diff options
author | David Robertson <davidr@element.io> | 2022-07-26 18:58:47 +0100 |
---|---|---|
committer | David Robertson <davidr@element.io> | 2022-07-26 18:58:47 +0100 |
commit | 49fa59e3bbf0dcc12d414f5b088cd0a3dc0ac972 (patch) | |
tree | ddb97466b669dd5235589de55f82f5f77da7465a | |
parent | Revert debug changes to Synapse (diff) | |
parent | Additional fixes for opentracing type hints. (#13362) (diff) | |
download | synapse-49fa59e3bbf0dcc12d414f5b088cd0a3dc0ac972.tar.xz |
Merge remote-tracking branch 'origin/develop' into dmr/stateres/debug
364 files changed, 9203 insertions, 4428 deletions
diff --git a/.ci/scripts/setup_complement_prerequisites.sh b/.ci/scripts/setup_complement_prerequisites.sh new file mode 100755 index 0000000000..4848901cbf --- /dev/null +++ b/.ci/scripts/setup_complement_prerequisites.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# Common commands to set up Complement's prerequisites in a GitHub Actions CI run. +# +# Must be called after Synapse has been checked out to `synapse/`. +# +set -eu + +alias block='{ set +x; } 2>/dev/null; func() { echo "::group::$*"; set -x; }; func' +alias endblock='{ set +x; } 2>/dev/null; func() { echo "::endgroup::"; set -x; }; func' + +block Set Go Version + # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. + # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path + + # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 + echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH + # Add the Go path to the PATH: We need this so we can call gotestfmt + echo "~/go/bin" >> $GITHUB_PATH +endblock + +block Install Complement Dependencies + sudo apt-get -qq update && sudo apt-get install -qqy libolm3 libolm-dev + go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest +endblock + +block Install custom gotestfmt template + mkdir .gotestfmt/github -p + cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl +endblock + +block Check out Complement + # Attempt to check out the same branch of Complement as the PR. If it + # doesn't exist, fallback to HEAD. + synapse/.ci/scripts/checkout_complement.sh +endblock diff --git a/.ci/scripts/test_old_deps.sh b/.ci/scripts/test_old_deps.sh index 769ca4517e..478c8d639a 100755 --- a/.ci/scripts/test_old_deps.sh +++ b/.ci/scripts/test_old_deps.sh @@ -27,9 +27,10 @@ export VIRTUALENV_NO_DOWNLOAD=1 # Patch the project definitions in-place: # - Replace all lower and tilde bounds with exact bounds -# - Make the pyopenssl 17.0, which is the oldest version that works with -# a `cryptography` compiled against OpenSSL 1.1. +# - Replace all caret bounds---but not the one that defines the supported Python version! # - Delete all lines referring to psycopg2 --- so no testing of postgres support. +# - Use pyopenssl 17.0, which is the oldest version that works with +# a `cryptography` compiled against OpenSSL 1.1. # - Omit systemd: we're not logging to journal here. # TODO: also replace caret bounds, see https://python-poetry.org/docs/dependency-specification/#version-constraints @@ -40,6 +41,7 @@ export VIRTUALENV_NO_DOWNLOAD=1 sed -i \ -e "s/[~>]=/==/g" \ + -e '/^python = "^/!s/\^/==/g' \ -e "/psycopg2/d" \ -e 's/pyOpenSSL = "==16.0.0"/pyOpenSSL = "==17.0.0"/' \ -e '/systemd/d' \ @@ -67,7 +69,7 @@ with open('pyproject.toml', 'w') as f: " python3 -c "$REMOVE_DEV_DEPENDENCIES" -pipx install poetry==1.1.12 +pipx install poetry==1.1.14 ~/.local/bin/poetry lock echo "::group::Patched pyproject.toml" diff --git a/.editorconfig b/.editorconfig index 3edf9e717c..d629bede5e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,3 +7,4 @@ root = true [*.py] indent_style = space indent_size = 4 +max_line_length = 88 diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 50d28c68ee..c3638c35eb 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,3 +1,16 @@ +# Commits in this file will be removed from GitHub blame results. +# +# To use this file locally, use: +# git blame --ignore-revs-file="path/to/.git-blame-ignore-revs" <files> +# +# or configure the `blame.ignoreRevsFile` option in your git config. +# +# If ignoring a pull request that was not squash merged, only the merge +# commit needs to be put here. Child commits will be resolved from it. + +# Run black (#3679). +8b3d9b6b199abb87246f982d5db356f1966db925 + # Black reformatting (#5482). 32e7c9e7f20b57dd081023ac42d6931a8da9b3a3 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2e4ee723d3..c8b033e8a4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -329,84 +329,41 @@ jobs: database: Postgres steps: - # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. - # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path - - name: "Set Go Version" - run: | - # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 - echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH - # Add the Go path to the PATH: We need this so we can call gotestfmt - echo "~/go/bin" >> $GITHUB_PATH - - - name: "Install Complement Dependencies" - run: | - sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev - go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest - - name: Run actions/checkout@v2 for synapse uses: actions/checkout@v2 with: path: synapse - - name: "Install custom gotestfmt template" - run: | - mkdir .gotestfmt/github -p - cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl - - # Attempt to check out the same branch of Complement as the PR. If it - # doesn't exist, fallback to HEAD. - - name: Checkout complement - run: synapse/.ci/scripts/checkout_complement.sh + - name: Prepare Complement's Prerequisites + run: synapse/.ci/scripts/setup_complement_prerequisites.sh - run: | set -o pipefail - POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt + POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt shell: bash name: Run Complement Tests - # We only run the workers tests on `develop` for now, because they're too slow to wait for on PRs. - # Sadly, you can't have an `if` condition on the value of a matrix, so this is a temporary, separate job for now. - # GitHub Actions doesn't support YAML anchors, so it's full-on duplication for now. - complement-developonly: - if: "${{ !failure() && !cancelled() && (github.ref == 'refs/heads/develop') }}" + # XXX When complement with workers is stable, move this back into the standard + # "complement" matrix above. + # + # See https://github.com/matrix-org/synapse/issues/13161 + complement-workers: + if: "${{ !failure() && !cancelled() }}" needs: linting-done runs-on: ubuntu-latest - - name: "Complement Workers (develop only)" steps: - # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. - # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path - - name: "Set Go Version" - run: | - # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 - echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH - # Add the Go path to the PATH: We need this so we can call gotestfmt - echo "~/go/bin" >> $GITHUB_PATH - - - name: "Install Complement Dependencies" - run: | - sudo apt-get -qq update && sudo apt-get install -qqy libolm3 libolm-dev - go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest - - name: Run actions/checkout@v2 for synapse uses: actions/checkout@v2 with: path: synapse - - name: "Install custom gotestfmt template" - run: | - mkdir .gotestfmt/github -p - cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl - - # Attempt to check out the same branch of Complement as the PR. If it - # doesn't exist, fallback to HEAD. - - name: Checkout complement - run: synapse/.ci/scripts/checkout_complement.sh + - name: Prepare Complement's Prerequisites + run: synapse/.ci/scripts/setup_complement_prerequisites.sh - run: | set -o pipefail - WORKERS=1 COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt + POSTGRES=1 WORKERS=1 COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt shell: bash name: Run Complement Tests diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml index 5f0671f350..dd8e6fbb1c 100644 --- a/.github/workflows/twisted_trunk.yml +++ b/.github/workflows/twisted_trunk.yml @@ -96,6 +96,51 @@ jobs: /logs/results.tap /logs/**/*.log* + complement: + if: "${{ !failure() && !cancelled() }}" + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + include: + - arrangement: monolith + database: SQLite + + - arrangement: monolith + database: Postgres + + - arrangement: workers + database: Postgres + + steps: + - name: Run actions/checkout@v2 for synapse + uses: actions/checkout@v2 + with: + path: synapse + + - name: Prepare Complement's Prerequisites + run: synapse/.ci/scripts/setup_complement_prerequisites.sh + + # This step is specific to the 'Twisted trunk' test run: + - name: Patch dependencies + run: | + set -x + DEBIAN_FRONTEND=noninteractive sudo apt-get install -yqq python3 pipx + pipx install poetry==1.1.14 + + poetry remove -n twisted + poetry add -n --extras tls git+https://github.com/twisted/twisted.git#trunk + poetry lock --no-update + # NOT IN 1.1.14 poetry lock --check + working-directory: synapse + + - run: | + set -o pipefail + TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt + shell: bash + name: Run Complement Tests + # open an issue if the build fails, so we know about it. open-issue: if: failure() @@ -103,6 +148,7 @@ jobs: - mypy - trial - sytest + - complement runs-on: ubuntu-latest diff --git a/CHANGES.md b/CHANGES.md index bd9b34dd7a..1d123abc19 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,245 @@ +Synapse vNext +============= + +As of this release, Synapse no longer allows the tasks of verifying email address ownership, and password reset confirmation, to be delegated to an identity server. For more information, see the [upgrade notes](https://matrix-org.github.io/synapse/v1.64/upgrade.html#upgrading-to-v1640). + + +Synapse 1.63.1 (2022-07-20) +=========================== + +Bugfixes +-------- + +- Fix a bug introduced in Synapse 1.63.0 where push actions were incorrectly calculated for appservice users. This caused performance issues on servers with large numbers of appservices. ([\#13332](https://github.com/matrix-org/synapse/issues/13332)) + + +Synapse 1.63.0 (2022-07-19) +=========================== + +Improved Documentation +---------------------- + +- Clarify that homeserver server names are included in the reported data when the `report_stats` config option is enabled. ([\#13321](https://github.com/matrix-org/synapse/issues/13321)) + + +Synapse 1.63.0rc1 (2022-07-12) +============================== + +Features +-------- + +- Add a rate limit for local users sending invites. ([\#13125](https://github.com/matrix-org/synapse/issues/13125)) +- Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of `/publicRooms` by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031)) +- Improve validation logic in the account data REST endpoints. ([\#13148](https://github.com/matrix-org/synapse/issues/13148)) + + +Bugfixes +-------- + +- Fix a long-standing bug where application services were not able to join remote federated rooms without a profile. ([\#13131](https://github.com/matrix-org/synapse/issues/13131)) +- Fix a long-standing bug where `_get_state_map_for_room` might raise errors when third party event rules callbacks are present. ([\#13174](https://github.com/matrix-org/synapse/issues/13174)) +- Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids. ([\#13226](https://github.com/matrix-org/synapse/issues/13226)) +- Fix a bug introduced in 1.54.0 where appservices would not receive room-less EDUs, like presence, when both [MSC2409](https://github.com/matrix-org/matrix-spec-proposals/pull/2409) and [MSC3202](https://github.com/matrix-org/matrix-spec-proposals/pull/3202) are enabled. ([\#13236](https://github.com/matrix-org/synapse/issues/13236)) +- Fix a bug introduced in 1.62.0 where rows were not deleted from `event_push_actions` table on large servers. ([\#13194](https://github.com/matrix-org/synapse/issues/13194)) +- Fix a bug introduced in 1.62.0 where notification counts would get stuck after a highlighted message. ([\#13223](https://github.com/matrix-org/synapse/issues/13223)) +- Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. ([\#13197](https://github.com/matrix-org/synapse/issues/13197)) +- Fix [MSC3202](https://github.com/matrix-org/matrix-spec-proposals/pull/3202)-enabled appservices not receiving to-device messages, preventing messages from being decrypted. ([\#13235](https://github.com/matrix-org/synapse/issues/13235)) + + +Updates to the Docker image +--------------------------- + +- Bump the version of `lxml` in matrix.org Docker images Debian packages from 4.8.0 to 4.9.1. ([\#13207](https://github.com/matrix-org/synapse/issues/13207)) + + +Improved Documentation +---------------------- + +- Add an explanation of the `--report-stats` argument to the docs. ([\#13029](https://github.com/matrix-org/synapse/issues/13029)) +- Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh. ([\#13032](https://github.com/matrix-org/synapse/issues/13032)) +- Add missing links to config options. ([\#13166](https://github.com/matrix-org/synapse/issues/13166)) +- Add documentation for homeserver usage statistics collection. ([\#13086](https://github.com/matrix-org/synapse/issues/13086)) +- Add documentation for the existing `databases` option in the homeserver configuration manual. ([\#13212](https://github.com/matrix-org/synapse/issues/13212)) +- Clean up references to sample configuration and redirect users to the configuration manual instead. ([\#13077](https://github.com/matrix-org/synapse/issues/13077), [\#13139](https://github.com/matrix-org/synapse/issues/13139)) +- Document how the Synapse team does reviews. ([\#13132](https://github.com/matrix-org/synapse/issues/13132)) +- Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. ([\#13116](https://github.com/matrix-org/synapse/issues/13116)) + + +Deprecations and Removals +------------------------- + +- Remove obsolete and for 8 years unused `RoomEventsStoreTestCase`. Contributed by @arkamar. ([\#13200](https://github.com/matrix-org/synapse/issues/13200)) + + +Internal Changes +---------------- + +- Add type annotations to `synapse.logging`, `tests.server` and `tests.utils`. ([\#13028](https://github.com/matrix-org/synapse/issues/13028), [\#13103](https://github.com/matrix-org/synapse/issues/13103), [\#13159](https://github.com/matrix-org/synapse/issues/13159), [\#13136](https://github.com/matrix-org/synapse/issues/13136)) +- Enforce type annotations for `tests.test_server`. ([\#13135](https://github.com/matrix-org/synapse/issues/13135)) +- Support temporary experimental return values for spam checker module callbacks. ([\#13044](https://github.com/matrix-org/synapse/issues/13044)) +- Add support to `complement.sh` for skipping the docker build. ([\#13143](https://github.com/matrix-org/synapse/issues/13143), [\#13158](https://github.com/matrix-org/synapse/issues/13158)) +- Add support to `complement.sh` for setting the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment variable. ([\#13152](https://github.com/matrix-org/synapse/issues/13152)) +- Enable Complement testing in the 'Twisted Trunk' CI runs. ([\#13079](https://github.com/matrix-org/synapse/issues/13079), [\#13157](https://github.com/matrix-org/synapse/issues/13157)) +- Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. ([\#13127](https://github.com/matrix-org/synapse/issues/13127)) +- Update config used by Complement to allow device name lookup over federation. ([\#13167](https://github.com/matrix-org/synapse/issues/13167)) +- Faster room joins: handle race between persisting an event and un-partial stating a room. ([\#13100](https://github.com/matrix-org/synapse/issues/13100)) +- Faster room joins: fix race in recalculation of current room state. ([\#13151](https://github.com/matrix-org/synapse/issues/13151)) +- Faster room joins: skip waiting for full state when processing incoming events over federation. ([\#13144](https://github.com/matrix-org/synapse/issues/13144)) +- Raise a `DependencyError` on missing dependencies instead of a `ConfigError`. ([\#13113](https://github.com/matrix-org/synapse/issues/13113)) +- Avoid stripping line breaks from SQL sent to the database. ([\#13129](https://github.com/matrix-org/synapse/issues/13129)) +- Apply ratelimiting earlier in processing of `/send` requests. ([\#13134](https://github.com/matrix-org/synapse/issues/13134)) +- Improve exception handling when processing events received over federation. ([\#13145](https://github.com/matrix-org/synapse/issues/13145)) +- Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script. ([\#13195](https://github.com/matrix-org/synapse/issues/13195)) +- Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. ([\#13119](https://github.com/matrix-org/synapse/issues/13119), [\#13153](https://github.com/matrix-org/synapse/issues/13153)) +- Reduce memory consumption when processing incoming events in large rooms. ([\#13078](https://github.com/matrix-org/synapse/issues/13078), [\#13222](https://github.com/matrix-org/synapse/issues/13222)) +- Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar). ([\#13209](https://github.com/matrix-org/synapse/issues/13209)) +- Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar). ([\#13210](https://github.com/matrix-org/synapse/issues/13210)) +- More aggressively rotate push actions. ([\#13211](https://github.com/matrix-org/synapse/issues/13211)) +- Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper. ([\#13228](https://github.com/matrix-org/synapse/issues/13228)) + +Synapse 1.62.0 (2022-07-05) +=========================== + +No significant changes since 1.62.0rc3. + +Authors of spam-checker plugins should consult the [upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.62/docs/upgrade.md#upgrading-to-v1620) to learn about the enriched signatures for spam checker callbacks, which are supported with this release of Synapse. + +Synapse 1.62.0rc3 (2022-07-04) +============================== + +Bugfixes +-------- + +- Update the version of the [ldap3 plugin](https://github.com/matrix-org/matrix-synapse-ldap3/) included in the `matrixdotorg/synapse` DockerHub images and the Debian packages hosted on `packages.matrix.org` to 0.2.1. This fixes [a bug](https://github.com/matrix-org/matrix-synapse-ldap3/pull/163) with usernames containing uppercase characters. ([\#13156](https://github.com/matrix-org/synapse/issues/13156)) +- Fix a bug introduced in Synapse 1.62.0rc1 affecting unread counts for users on small servers. ([\#13168](https://github.com/matrix-org/synapse/issues/13168)) + + +Synapse 1.62.0rc2 (2022-07-01) +============================== + +Bugfixes +-------- + +- Fix unread counts for users on large servers. Introduced in v1.62.0rc1. ([\#13140](https://github.com/matrix-org/synapse/issues/13140)) +- Fix DB performance when deleting old push notifications. Introduced in v1.62.0rc1. ([\#13141](https://github.com/matrix-org/synapse/issues/13141)) + + +Synapse 1.62.0rc1 (2022-06-28) +============================== + +Features +-------- + +- Port the spam-checker API callbacks to a new, richer API. This is part of an ongoing change to let spam-checker modules inform users of the reason their event or operation is rejected. ([\#12857](https://github.com/matrix-org/synapse/issues/12857), [\#13047](https://github.com/matrix-org/synapse/issues/13047)) +- Allow server admins to customise the response of the `/.well-known/matrix/client` endpoint. ([\#13035](https://github.com/matrix-org/synapse/issues/13035)) +- Add metrics measuring the CPU and DB time spent in state resolution. ([\#13036](https://github.com/matrix-org/synapse/issues/13036)) +- Speed up fetching of device list changes in `/sync` and `/keys/changes`. ([\#13045](https://github.com/matrix-org/synapse/issues/13045), [\#13098](https://github.com/matrix-org/synapse/issues/13098)) +- Improve URL previews for sites which only provide Twitter Card metadata, e.g. LWN.net. ([\#13056](https://github.com/matrix-org/synapse/issues/13056)) + + +Bugfixes +-------- + +- Update [MSC3786](https://github.com/matrix-org/matrix-spec-proposals/pull/3786) implementation to check `state_key`. ([\#12939](https://github.com/matrix-org/synapse/issues/12939)) +- Fix a bug introduced in Synapse 1.58 where Synapse would not report full version information when installed from a git checkout. This is a best-effort affair and not guaranteed to be stable. ([\#12973](https://github.com/matrix-org/synapse/issues/12973)) +- Fix a bug introduced in Synapse 1.60 where Synapse would fail to start if the `sqlite3` module was not available. ([\#12979](https://github.com/matrix-org/synapse/issues/12979)) +- Fix a bug where non-standard information was required when requesting the `/hierarchy` API over federation. Introduced + in Synapse v1.41.0. ([\#12991](https://github.com/matrix-org/synapse/issues/12991)) +- Fix a long-standing bug which meant that rate limiting was not restrictive enough in some cases. ([\#13018](https://github.com/matrix-org/synapse/issues/13018)) +- Fix a bug introduced in Synapse 1.58 where profile requests for a malformed user ID would ccause an internal error. Synapse now returns 400 Bad Request in this situation. ([\#13041](https://github.com/matrix-org/synapse/issues/13041)) +- Fix some inconsistencies in the event authentication code. ([\#13087](https://github.com/matrix-org/synapse/issues/13087), [\#13088](https://github.com/matrix-org/synapse/issues/13088)) +- Fix a long-standing bug where room directory requests would cause an internal server error if given a malformed room alias. ([\#13106](https://github.com/matrix-org/synapse/issues/13106)) + + +Improved Documentation +---------------------- + +- Add documentation for how to configure Synapse with Workers using Docker Compose. Includes example worker config and docker-compose.yaml. Contributed by @Thumbscrew. ([\#12737](https://github.com/matrix-org/synapse/issues/12737)) +- Ensure the [Poetry cheat sheet](https://matrix-org.github.io/synapse/develop/development/dependencies.html) is available in the online documentation. ([\#13022](https://github.com/matrix-org/synapse/issues/13022)) +- Mention removed community/group worker endpoints in upgrade.md. Contributed by @olmari. ([\#13023](https://github.com/matrix-org/synapse/issues/13023)) +- Add instructions for running Complement with `gotestfmt`-formatted output locally. ([\#13073](https://github.com/matrix-org/synapse/issues/13073)) +- Update OpenTracing docs to reference the configuration manual rather than the configuration file. ([\#13076](https://github.com/matrix-org/synapse/issues/13076)) +- Update information on downstream Debian packages. ([\#13095](https://github.com/matrix-org/synapse/issues/13095)) +- Remove documentation for the Delete Group Admin API which no longer exists. ([\#13112](https://github.com/matrix-org/synapse/issues/13112)) + + +Deprecations and Removals +------------------------- + +- Remove the unspecced `DELETE /directory/list/room/{roomId}` endpoint, which hid rooms from the [public room directory](https://spec.matrix.org/v1.3/client-server-api/#listing-rooms). Instead, `PUT` to the same URL with a visibility of `"private"`. ([\#13123](https://github.com/matrix-org/synapse/issues/13123)) + + +Internal Changes +---------------- + +- Add tests for cancellation of `GET /rooms/$room_id/members` and `GET /rooms/$room_id/state` requests. ([\#12674](https://github.com/matrix-org/synapse/issues/12674)) +- Report login failures due to unknown third party identifiers in the same way as failures due to invalid passwords. This prevents an attacker from using the error response to determine if the identifier exists. Contributed by Daniel Aloni. ([\#12738](https://github.com/matrix-org/synapse/issues/12738)) +- Merge the Complement testing Docker images into a single, multi-purpose image. ([\#12881](https://github.com/matrix-org/synapse/issues/12881), [\#13075](https://github.com/matrix-org/synapse/issues/13075)) +- Simplify the database schema for `event_edges`. ([\#12893](https://github.com/matrix-org/synapse/issues/12893)) +- Clean up the test code for client disconnection. ([\#12929](https://github.com/matrix-org/synapse/issues/12929)) +- Remove code generating comments in configuration. ([\#12941](https://github.com/matrix-org/synapse/issues/12941)) +- Add `Cross-Origin-Resource-Policy: cross-origin` header to content repository's thumbnail and download endpoints. ([\#12944](https://github.com/matrix-org/synapse/issues/12944)) +- Replace noop background updates with `DELETE` delta. ([\#12954](https://github.com/matrix-org/synapse/issues/12954), [\#13050](https://github.com/matrix-org/synapse/issues/13050)) +- Use lower isolation level when inserting read receipts to avoid serialization errors. Contributed by Nick @ Beeper. ([\#12957](https://github.com/matrix-org/synapse/issues/12957)) +- Reduce the amount of state we pull from the DB. ([\#12963](https://github.com/matrix-org/synapse/issues/12963)) +- Enable testing against PostgreSQL databases in Complement CI. ([\#12965](https://github.com/matrix-org/synapse/issues/12965), [\#13034](https://github.com/matrix-org/synapse/issues/13034)) +- Fix an inaccurate comment. ([\#12969](https://github.com/matrix-org/synapse/issues/12969)) +- Remove the `delete_device` method and always call `delete_devices`. ([\#12970](https://github.com/matrix-org/synapse/issues/12970)) +- Use a GitHub form for issues rather than a hard-to-read, easy-to-ignore template. ([\#12982](https://github.com/matrix-org/synapse/issues/12982)) +- Move [MSC3715](https://github.com/matrix-org/matrix-spec-proposals/pull/3715) behind an experimental config flag. ([\#12984](https://github.com/matrix-org/synapse/issues/12984)) +- Add type hints to tests. ([\#12985](https://github.com/matrix-org/synapse/issues/12985), [\#13099](https://github.com/matrix-org/synapse/issues/13099)) +- Refactor macaroon tokens generation and move the unsubscribe link in notification emails to `/_synapse/client/unsubscribe`. ([\#12986](https://github.com/matrix-org/synapse/issues/12986)) +- Fix documentation for running complement tests. ([\#12990](https://github.com/matrix-org/synapse/issues/12990)) +- Faster joins: add issue links to the TODO comments in the code. ([\#13004](https://github.com/matrix-org/synapse/issues/13004)) +- Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. ([\#13005](https://github.com/matrix-org/synapse/issues/13005), [\#13096](https://github.com/matrix-org/synapse/issues/13096), [\#13118](https://github.com/matrix-org/synapse/issues/13118)) +- Replaced usage of PyJWT with methods from Authlib in `org.matrix.login.jwt`. Contributed by Hannes Lerchl. ([\#13011](https://github.com/matrix-org/synapse/issues/13011)) +- Modernize the `contrib/graph/` scripts. ([\#13013](https://github.com/matrix-org/synapse/issues/13013)) +- Remove redundant `room_version` parameters from event auth functions. ([\#13017](https://github.com/matrix-org/synapse/issues/13017)) +- Decouple `synapse.api.auth_blocking.AuthBlocking` from `synapse.api.auth.Auth`. ([\#13021](https://github.com/matrix-org/synapse/issues/13021)) +- Add type annotations to `synapse.storage.databases.main.devices`. ([\#13025](https://github.com/matrix-org/synapse/issues/13025)) +- Set default `sync_response_cache_duration` to two minutes. ([\#13042](https://github.com/matrix-org/synapse/issues/13042)) +- Rename CI test runs. ([\#13046](https://github.com/matrix-org/synapse/issues/13046)) +- Increase timeout of complement CI test runs. ([\#13048](https://github.com/matrix-org/synapse/issues/13048)) +- Refactor entry points so that they all have a `main` function. ([\#13052](https://github.com/matrix-org/synapse/issues/13052)) +- Refactor the Dockerfile-workers configuration script to use Jinja2 templates in Synapse workers' Supervisord blocks. ([\#13054](https://github.com/matrix-org/synapse/issues/13054)) +- Add headers to individual options in config documentation to allow for linking. ([\#13055](https://github.com/matrix-org/synapse/issues/13055)) +- Make Complement CI logs easier to read. ([\#13057](https://github.com/matrix-org/synapse/issues/13057), [\#13058](https://github.com/matrix-org/synapse/issues/13058), [\#13069](https://github.com/matrix-org/synapse/issues/13069)) +- Don't instantiate modules with keyword arguments. ([\#13060](https://github.com/matrix-org/synapse/issues/13060)) +- Fix type checking errors against Twisted trunk. ([\#13061](https://github.com/matrix-org/synapse/issues/13061)) +- Allow MSC3030 `timestamp_to_event` calls from anyone on world-readable rooms. ([\#13062](https://github.com/matrix-org/synapse/issues/13062)) +- Add a CI job to check that schema deltas are in the correct folder. ([\#13063](https://github.com/matrix-org/synapse/issues/13063)) +- Avoid rechecking event auth rules which are independent of room state. ([\#13065](https://github.com/matrix-org/synapse/issues/13065)) +- Reduce the duplication of code that invokes the rate limiter. ([\#13070](https://github.com/matrix-org/synapse/issues/13070)) +- Add a Subject Alternative Name to the certificate generated for Complement tests. ([\#13071](https://github.com/matrix-org/synapse/issues/13071)) +- Add more tests for room upgrades. ([\#13074](https://github.com/matrix-org/synapse/issues/13074)) +- Pin dependencies maintained by matrix.org to [semantic version](https://semver.org/) bounds. ([\#13082](https://github.com/matrix-org/synapse/issues/13082)) +- Correctly report prometheus DB stats for `get_earliest_token_for_stats`. ([\#13085](https://github.com/matrix-org/synapse/issues/13085)) +- Fix a long-standing bug where a finished logging context would be re-started when Synapse failed to persist an event from federation. ([\#13089](https://github.com/matrix-org/synapse/issues/13089)) +- Simplify the alias deletion logic as an application service. ([\#13093](https://github.com/matrix-org/synapse/issues/13093)) +- Add type annotations to `tests.test_server`. ([\#13124](https://github.com/matrix-org/synapse/issues/13124)) + + +Synapse 1.61.1 (2022-06-28) +=========================== + +This patch release fixes a security issue regarding URL previews, affecting all prior versions of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild. + +Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below. + +## Security advisory + +The following issue is fixed in 1.61.1. + +* [GHSA-22p3-qrh9-cx32](https://github.com/matrix-org/synapse/security/advisories/GHSA-22p3-qrh9-cx32) / [CVE-2022-31052](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-31052) + + Synapse instances with the [`url_preview_enabled`](https://matrix-org.github.io/synapse/v1.61/usage/configuration/config_documentation.html#media-store) homeserver config option set to `true` are affected. URL previews of some web pages can lead to unbounded recursion, causing the request to either fail, or in some cases crash the running Synapse process. + + Requesting URL previews requires authentication. Nevertheless, it is possible to exploit this maliciously, either by malicious users on the homeserver, or by remote users sending URLs that a local user's client may automatically request a URL preview for. + + Homeservers with the `url_preview_enabled` configuration option set to `false` (the default) are unaffected. Instances with the `enable_media_repo` configuration option set to `false` are also unaffected, as this also disables URL preview functionality. + + Fixed by [fa1308061802ac7b7d20e954ba7372c5ac292333](https://github.com/matrix-org/synapse/commit/fa1308061802ac7b7d20e954ba7372c5ac292333). + Synapse 1.61.0 (2022-06-14) =========================== diff --git a/changelog.d/12674.misc b/changelog.d/12674.misc deleted file mode 100644 index c8a8f32f0a..0000000000 --- a/changelog.d/12674.misc +++ /dev/null @@ -1 +0,0 @@ -Add tests for cancellation of `GET /rooms/$room_id/members` and `GET /rooms/$room_id/state` requests. diff --git a/changelog.d/12737.doc b/changelog.d/12737.doc deleted file mode 100644 index ab2d1f2fd9..0000000000 --- a/changelog.d/12737.doc +++ /dev/null @@ -1 +0,0 @@ -Add documentation for how to configure Synapse with Workers using Docker Compose. Includes example worker config and docker-compose.yaml. Contributed by @Thumbscrew. \ No newline at end of file diff --git a/changelog.d/12738.misc b/changelog.d/12738.misc deleted file mode 100644 index 8252223475..0000000000 --- a/changelog.d/12738.misc +++ /dev/null @@ -1 +0,0 @@ -Report login failures due to unknown third party identifiers in the same way as failures due to invalid passwords. This prevents an attacker from using the error response to determine if the identifier exists. Contributed by Daniel Aloni. \ No newline at end of file diff --git a/changelog.d/12857.feature b/changelog.d/12857.feature deleted file mode 100644 index ddd1dbe685..0000000000 --- a/changelog.d/12857.feature +++ /dev/null @@ -1 +0,0 @@ -Port spam-checker API callbacks to a new, richer API. This is part of an ongoing change to let spam-checker modules inform users of the reason their event or operation is rejected. diff --git a/changelog.d/12881.misc b/changelog.d/12881.misc deleted file mode 100644 index 8a83182bd4..0000000000 --- a/changelog.d/12881.misc +++ /dev/null @@ -1 +0,0 @@ -Merge the Complement testing Docker images into a single, multi-purpose image. \ No newline at end of file diff --git a/changelog.d/12893.misc b/changelog.d/12893.misc deleted file mode 100644 index 5705210303..0000000000 --- a/changelog.d/12893.misc +++ /dev/null @@ -1 +0,0 @@ -Simplify the database schema for `event_edges`. diff --git a/changelog.d/12929.misc b/changelog.d/12929.misc deleted file mode 100644 index 20718d258d..0000000000 --- a/changelog.d/12929.misc +++ /dev/null @@ -1 +0,0 @@ -Clean up the test code for client disconnection. diff --git a/changelog.d/12941.misc b/changelog.d/12941.misc deleted file mode 100644 index 6a74f255df..0000000000 --- a/changelog.d/12941.misc +++ /dev/null @@ -1 +0,0 @@ -Remove code generating comments in configuration. diff --git a/changelog.d/12942.misc b/changelog.d/12942.misc new file mode 100644 index 0000000000..acb2558d57 --- /dev/null +++ b/changelog.d/12942.misc @@ -0,0 +1 @@ +Use lower isolation level when purging rooms to avoid serialization errors. Contributed by Nick @ Beeper. diff --git a/changelog.d/12943.misc b/changelog.d/12943.misc new file mode 100644 index 0000000000..f66bb3ec32 --- /dev/null +++ b/changelog.d/12943.misc @@ -0,0 +1 @@ +Remove code which incorrectly attempted to reconcile state with remote servers when processing incoming events. diff --git a/changelog.d/12954.misc b/changelog.d/12954.misc deleted file mode 100644 index 20bf136732..0000000000 --- a/changelog.d/12954.misc +++ /dev/null @@ -1 +0,0 @@ -Replace noop background updates with `DELETE` delta. diff --git a/changelog.d/12957.misc b/changelog.d/12957.misc deleted file mode 100644 index 0c075276ec..0000000000 --- a/changelog.d/12957.misc +++ /dev/null @@ -1 +0,0 @@ -Use lower isolation level when inserting read receipts to avoid serialization errors. Contributed by Nick @ Beeper. diff --git a/changelog.d/12963.misc b/changelog.d/12963.misc deleted file mode 100644 index d57e1aca6b..0000000000 --- a/changelog.d/12963.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce the amount of state we pull from the DB. diff --git a/changelog.d/12965.misc b/changelog.d/12965.misc deleted file mode 100644 index cc2823e12b..0000000000 --- a/changelog.d/12965.misc +++ /dev/null @@ -1 +0,0 @@ -Enable testing against PostgreSQL databases in Complement CI. \ No newline at end of file diff --git a/changelog.d/12967.removal b/changelog.d/12967.removal new file mode 100644 index 0000000000..0aafd6a4d9 --- /dev/null +++ b/changelog.d/12967.removal @@ -0,0 +1 @@ +Drop tables used for groups/communities. diff --git a/changelog.d/12969.misc b/changelog.d/12969.misc deleted file mode 100644 index 05de7ce839..0000000000 --- a/changelog.d/12969.misc +++ /dev/null @@ -1 +0,0 @@ -Fix an inaccurate comment. diff --git a/changelog.d/12970.misc b/changelog.d/12970.misc deleted file mode 100644 index 8f874aa07b..0000000000 --- a/changelog.d/12970.misc +++ /dev/null @@ -1 +0,0 @@ -Remove the `delete_device` method and always call `delete_devices`. diff --git a/changelog.d/12973.bugfix b/changelog.d/12973.bugfix deleted file mode 100644 index 1bf45854ff..0000000000 --- a/changelog.d/12973.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in Synapse 1.58 where Synapse would not report full version information when installed from a git checkout. This is a best-effort affair and not guaranteed to be stable. diff --git a/changelog.d/12979.bugfix b/changelog.d/12979.bugfix deleted file mode 100644 index 6b54408025..0000000000 --- a/changelog.d/12979.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in Synapse 1.60 where Synapse would fail to start if the `sqlite3` module was not available. diff --git a/changelog.d/12982.misc b/changelog.d/12982.misc deleted file mode 100644 index 036b69efe6..0000000000 --- a/changelog.d/12982.misc +++ /dev/null @@ -1 +0,0 @@ -Use a GitHub form for issues rather than a hard-to-read, easy-to-ignore template. \ No newline at end of file diff --git a/changelog.d/12984.misc b/changelog.d/12984.misc deleted file mode 100644 index a902017180..0000000000 --- a/changelog.d/12984.misc +++ /dev/null @@ -1 +0,0 @@ -Move [MSC3715](https://github.com/matrix-org/matrix-spec-proposals/pull/3715) behind an experimental config flag. diff --git a/changelog.d/12985.misc b/changelog.d/12985.misc deleted file mode 100644 index d5ab9eedea..0000000000 --- a/changelog.d/12985.misc +++ /dev/null @@ -1 +0,0 @@ -Add type annotations to `tests.state.test_v2`. diff --git a/changelog.d/12986.misc b/changelog.d/12986.misc deleted file mode 100644 index 937b888023..0000000000 --- a/changelog.d/12986.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor macaroon tokens generation and move the unsubscribe link in notification emails to `/_synapse/client/unsubscribe`. diff --git a/changelog.d/12990.misc b/changelog.d/12990.misc deleted file mode 100644 index c68f6a731e..0000000000 --- a/changelog.d/12990.misc +++ /dev/null @@ -1 +0,0 @@ -Fix documentation for running complement tests. diff --git a/changelog.d/12991.bugfix b/changelog.d/12991.bugfix deleted file mode 100644 index c6e388d5b9..0000000000 --- a/changelog.d/12991.bugfix +++ /dev/null @@ -1,2 +0,0 @@ -Fix a bug where non-standard information was required when requesting the `/hierarchy` API over federation. Introduced -in Synapse v1.41.0. diff --git a/changelog.d/13004.misc b/changelog.d/13004.misc deleted file mode 100644 index d8e93d87af..0000000000 --- a/changelog.d/13004.misc +++ /dev/null @@ -1 +0,0 @@ -Faster joins: add issue links to the TODO comments in the code. diff --git a/changelog.d/13005.misc b/changelog.d/13005.misc deleted file mode 100644 index 3bb51962e7..0000000000 --- a/changelog.d/13005.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. diff --git a/changelog.d/13013.misc b/changelog.d/13013.misc deleted file mode 100644 index 903c6a3c8a..0000000000 --- a/changelog.d/13013.misc +++ /dev/null @@ -1 +0,0 @@ -Modernize the `contrib/graph/` scripts. diff --git a/changelog.d/13017.misc b/changelog.d/13017.misc deleted file mode 100644 index b314687f9c..0000000000 --- a/changelog.d/13017.misc +++ /dev/null @@ -1 +0,0 @@ -Remove redundant `room_version` parameters from event auth functions. diff --git a/changelog.d/13018.bugfix b/changelog.d/13018.bugfix deleted file mode 100644 index a84657f04f..0000000000 --- a/changelog.d/13018.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug which meant that rate limiting was not restrictive enough in some cases. \ No newline at end of file diff --git a/changelog.d/13021.misc b/changelog.d/13021.misc deleted file mode 100644 index 84c41cdf59..0000000000 --- a/changelog.d/13021.misc +++ /dev/null @@ -1 +0,0 @@ -Decouple `synapse.api.auth_blocking.AuthBlocking` from `synapse.api.auth.Auth`. diff --git a/changelog.d/13022.doc b/changelog.d/13022.doc deleted file mode 100644 index 4d6ac7ae94..0000000000 --- a/changelog.d/13022.doc +++ /dev/null @@ -1 +0,0 @@ -Ensure the [Poetry cheat sheet](https://matrix-org.github.io/synapse/develop/development/dependencies.html) is available in the online documentation. diff --git a/changelog.d/13023.doc b/changelog.d/13023.doc deleted file mode 100644 index 5589c7492c..0000000000 --- a/changelog.d/13023.doc +++ /dev/null @@ -1 +0,0 @@ -Mention removed community/group worker endpoints in upgrade.md. Contributed by @olmari. \ No newline at end of file diff --git a/changelog.d/13025.misc b/changelog.d/13025.misc deleted file mode 100644 index 7cb0d174b7..0000000000 --- a/changelog.d/13025.misc +++ /dev/null @@ -1 +0,0 @@ -Add type annotations to `synapse.storage.databases.main.devices`. diff --git a/changelog.d/13034.misc b/changelog.d/13034.misc deleted file mode 100644 index cc2823e12b..0000000000 --- a/changelog.d/13034.misc +++ /dev/null @@ -1 +0,0 @@ -Enable testing against PostgreSQL databases in Complement CI. \ No newline at end of file diff --git a/changelog.d/13036.feature b/changelog.d/13036.feature deleted file mode 100644 index 71e5a29fe9..0000000000 --- a/changelog.d/13036.feature +++ /dev/null @@ -1 +0,0 @@ -Add metrics measuring the CPU and DB time spent in state resolution. diff --git a/changelog.d/13038.feature b/changelog.d/13038.feature new file mode 100644 index 0000000000..1278f1b4e9 --- /dev/null +++ b/changelog.d/13038.feature @@ -0,0 +1 @@ +Provide more info why we don't have any thumbnails to serve. diff --git a/changelog.d/13041.bugfix b/changelog.d/13041.bugfix deleted file mode 100644 index edb1635eb9..0000000000 --- a/changelog.d/13041.bugfix +++ /dev/null @@ -1,2 +0,0 @@ -Fix a bug introduced in Synapse 1.58 where profile requests for a malformed user ID would ccause an internal error. Synapse now returns 400 Bad Request in this situation. - diff --git a/changelog.d/13046.misc b/changelog.d/13046.misc deleted file mode 100644 index 1248c34d39..0000000000 --- a/changelog.d/13046.misc +++ /dev/null @@ -1 +0,0 @@ -Rename CI test runs. diff --git a/changelog.d/13047.feature b/changelog.d/13047.feature deleted file mode 100644 index ddd1dbe685..0000000000 --- a/changelog.d/13047.feature +++ /dev/null @@ -1 +0,0 @@ -Port spam-checker API callbacks to a new, richer API. This is part of an ongoing change to let spam-checker modules inform users of the reason their event or operation is rejected. diff --git a/changelog.d/13048.misc b/changelog.d/13048.misc deleted file mode 100644 index 073c8b1a96..0000000000 --- a/changelog.d/13048.misc +++ /dev/null @@ -1 +0,0 @@ -Increase timeout of complement CI test runs. diff --git a/changelog.d/13050.misc b/changelog.d/13050.misc deleted file mode 100644 index 20bf136732..0000000000 --- a/changelog.d/13050.misc +++ /dev/null @@ -1 +0,0 @@ -Replace noop background updates with `DELETE` delta. diff --git a/changelog.d/13052.misc b/changelog.d/13052.misc deleted file mode 100644 index 0d11dfb12a..0000000000 --- a/changelog.d/13052.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor entry points so that they all have a `main` function. \ No newline at end of file diff --git a/changelog.d/13057.misc b/changelog.d/13057.misc deleted file mode 100644 index 4102bf96b5..0000000000 --- a/changelog.d/13057.misc +++ /dev/null @@ -1 +0,0 @@ -Make Complement CI logs easier to read. \ No newline at end of file diff --git a/changelog.d/13058.misc b/changelog.d/13058.misc deleted file mode 100644 index 4102bf96b5..0000000000 --- a/changelog.d/13058.misc +++ /dev/null @@ -1 +0,0 @@ -Make Complement CI logs easier to read. \ No newline at end of file diff --git a/changelog.d/13060.misc b/changelog.d/13060.misc deleted file mode 100644 index c2376701f4..0000000000 --- a/changelog.d/13060.misc +++ /dev/null @@ -1 +0,0 @@ -Don't instantiate modules with keyword arguments. diff --git a/changelog.d/13061.misc b/changelog.d/13061.misc deleted file mode 100644 index 4c55e2b4ed..0000000000 --- a/changelog.d/13061.misc +++ /dev/null @@ -1 +0,0 @@ -Fix type checking errors against Twisted trunk. diff --git a/changelog.d/13063.misc b/changelog.d/13063.misc deleted file mode 100644 index 167d6d2cd5..0000000000 --- a/changelog.d/13063.misc +++ /dev/null @@ -1 +0,0 @@ -Add a CI job to check that schema deltas are in the correct folder. diff --git a/changelog.d/13069.misc b/changelog.d/13069.misc deleted file mode 100644 index 4102bf96b5..0000000000 --- a/changelog.d/13069.misc +++ /dev/null @@ -1 +0,0 @@ -Make Complement CI logs easier to read. \ No newline at end of file diff --git a/changelog.d/13094.misc b/changelog.d/13094.misc new file mode 100644 index 0000000000..f1e55ae476 --- /dev/null +++ b/changelog.d/13094.misc @@ -0,0 +1 @@ +Make the AS login method call `Auth.get_user_by_req` for checking the AS token. diff --git a/changelog.d/13172.misc b/changelog.d/13172.misc new file mode 100644 index 0000000000..124a1b3662 --- /dev/null +++ b/changelog.d/13172.misc @@ -0,0 +1 @@ +Always use a version of canonicaljson that supports the C implementation of frozendict. diff --git a/changelog.d/13175.misc b/changelog.d/13175.misc new file mode 100644 index 0000000000..f273b3d6ca --- /dev/null +++ b/changelog.d/13175.misc @@ -0,0 +1 @@ +Add prometheus counters for ephemeral events and to device messages pushed to app services. Contributed by Brad @ Beeper. diff --git a/changelog.d/13192.removal b/changelog.d/13192.removal new file mode 100644 index 0000000000..a7dffd1c48 --- /dev/null +++ b/changelog.d/13192.removal @@ -0,0 +1 @@ +Drop support for delegating email verification to an external server. diff --git a/changelog.d/13198.misc b/changelog.d/13198.misc new file mode 100644 index 0000000000..5aef2432df --- /dev/null +++ b/changelog.d/13198.misc @@ -0,0 +1 @@ +Refactor receipts servlet logic to avoid duplicated code. diff --git a/changelog.d/13205.feature b/changelog.d/13205.feature new file mode 100644 index 0000000000..d89aa9aa75 --- /dev/null +++ b/changelog.d/13205.feature @@ -0,0 +1 @@ +Allow pagination from remote event after discovering it from MSC3030 `/timestamp_to_event`. diff --git a/changelog.d/13208.feature b/changelog.d/13208.feature new file mode 100644 index 0000000000..b0c5f090ee --- /dev/null +++ b/changelog.d/13208.feature @@ -0,0 +1 @@ +Add a `room_type` field in the responses for the list room and room details admin API. Contributed by @andrewdoh. \ No newline at end of file diff --git a/changelog.d/13215.misc b/changelog.d/13215.misc new file mode 100644 index 0000000000..3da35addb3 --- /dev/null +++ b/changelog.d/13215.misc @@ -0,0 +1 @@ +Preparation for database schema simplifications: populate `state_key` and `rejection_reason` for existing rows in the `events` table. diff --git a/changelog.d/13218.misc b/changelog.d/13218.misc new file mode 100644 index 0000000000..b1c8e5c747 --- /dev/null +++ b/changelog.d/13218.misc @@ -0,0 +1 @@ +Remove unused database table `event_reference_hashes`. diff --git a/changelog.d/13220.feature b/changelog.d/13220.feature new file mode 100644 index 0000000000..9b0240fdc8 --- /dev/null +++ b/changelog.d/13220.feature @@ -0,0 +1 @@ +Add support for room version 10. diff --git a/changelog.d/13224.misc b/changelog.d/13224.misc new file mode 100644 index 0000000000..41f8693b74 --- /dev/null +++ b/changelog.d/13224.misc @@ -0,0 +1 @@ +Further reduce queries used sending events when creating new rooms. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13231.doc b/changelog.d/13231.doc new file mode 100644 index 0000000000..e750f9da49 --- /dev/null +++ b/changelog.d/13231.doc @@ -0,0 +1 @@ +Provide an example of using the Admin API. Contributed by @jejo86. diff --git a/changelog.d/13233.doc b/changelog.d/13233.doc new file mode 100644 index 0000000000..3eaea7c5e3 --- /dev/null +++ b/changelog.d/13233.doc @@ -0,0 +1 @@ +Move the documentation for how URL previews work to the URL preview module. diff --git a/changelog.d/13239.removal b/changelog.d/13239.removal new file mode 100644 index 0000000000..8f6045176d --- /dev/null +++ b/changelog.d/13239.removal @@ -0,0 +1 @@ +Drop support for calling `/_matrix/client/v3/account/3pid/bind` without an `id_access_token`, which was not permitted by the spec. Contributed by @Vetchu. \ No newline at end of file diff --git a/changelog.d/13240.misc b/changelog.d/13240.misc new file mode 100644 index 0000000000..0567e47d64 --- /dev/null +++ b/changelog.d/13240.misc @@ -0,0 +1 @@ +Call the v2 identity service `/3pid/unbind` endpoint, rather than v1. \ No newline at end of file diff --git a/changelog.d/13242.misc b/changelog.d/13242.misc new file mode 100644 index 0000000000..7f8ec0815f --- /dev/null +++ b/changelog.d/13242.misc @@ -0,0 +1 @@ +Use an asynchronous cache wrapper for the get event cache. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13251.misc b/changelog.d/13251.misc new file mode 100644 index 0000000000..526369e403 --- /dev/null +++ b/changelog.d/13251.misc @@ -0,0 +1 @@ +Optimise federation sender and appservice pusher event stream processing queries. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13253.misc b/changelog.d/13253.misc new file mode 100644 index 0000000000..cba6b9ee0f --- /dev/null +++ b/changelog.d/13253.misc @@ -0,0 +1 @@ +Preparatory work for a per-room rate limiter on joins. diff --git a/changelog.d/13254.misc b/changelog.d/13254.misc new file mode 100644 index 0000000000..cba6b9ee0f --- /dev/null +++ b/changelog.d/13254.misc @@ -0,0 +1 @@ +Preparatory work for a per-room rate limiter on joins. diff --git a/changelog.d/13255.misc b/changelog.d/13255.misc new file mode 100644 index 0000000000..cba6b9ee0f --- /dev/null +++ b/changelog.d/13255.misc @@ -0,0 +1 @@ +Preparatory work for a per-room rate limiter on joins. diff --git a/changelog.d/13257.misc b/changelog.d/13257.misc new file mode 100644 index 0000000000..5fc1388520 --- /dev/null +++ b/changelog.d/13257.misc @@ -0,0 +1 @@ +Log the stack when waiting for an entire room to be un-partial stated. diff --git a/changelog.d/13258.misc b/changelog.d/13258.misc new file mode 100644 index 0000000000..a187c46aa6 --- /dev/null +++ b/changelog.d/13258.misc @@ -0,0 +1 @@ +Fix spurious warning when fetching state after a missing prev event. diff --git a/changelog.d/13260.misc b/changelog.d/13260.misc new file mode 100644 index 0000000000..b55ff32c76 --- /dev/null +++ b/changelog.d/13260.misc @@ -0,0 +1 @@ +Clean-up tests for notifications. diff --git a/changelog.d/13261.doc b/changelog.d/13261.doc new file mode 100644 index 0000000000..3eaea7c5e3 --- /dev/null +++ b/changelog.d/13261.doc @@ -0,0 +1 @@ +Move the documentation for how URL previews work to the URL preview module. diff --git a/changelog.d/13263.bugfix b/changelog.d/13263.bugfix new file mode 100644 index 0000000000..91e1d1e7eb --- /dev/null +++ b/changelog.d/13263.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.15.0 where adding a user through the Synapse Admin API with a phone number would fail if the "enable_email_notifs" and "email_notifs_for_new_users" options were enabled. Contributed by @thomasweston12. diff --git a/changelog.d/13266.misc b/changelog.d/13266.misc new file mode 100644 index 0000000000..d583acb81b --- /dev/null +++ b/changelog.d/13266.misc @@ -0,0 +1 @@ +Do not fail build if complement with workers fails. diff --git a/changelog.d/13267.misc b/changelog.d/13267.misc new file mode 100644 index 0000000000..a334414320 --- /dev/null +++ b/changelog.d/13267.misc @@ -0,0 +1 @@ +Don't pull out state in `compute_event_context` for unconflicted state. diff --git a/changelog.d/13270.bugfix b/changelog.d/13270.bugfix new file mode 100644 index 0000000000..d023b25eea --- /dev/null +++ b/changelog.d/13270.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.40 where a user invited to a restricted room would be briefly unable to join. diff --git a/changelog.d/13271.doc b/changelog.d/13271.doc new file mode 100644 index 0000000000..b50e60d029 --- /dev/null +++ b/changelog.d/13271.doc @@ -0,0 +1 @@ +Add another `contrib` script to help set up worker processes. Contributed by @villepeh. diff --git a/changelog.d/13274.misc b/changelog.d/13274.misc new file mode 100644 index 0000000000..a334414320 --- /dev/null +++ b/changelog.d/13274.misc @@ -0,0 +1 @@ +Don't pull out state in `compute_event_context` for unconflicted state. diff --git a/changelog.d/13276.feature b/changelog.d/13276.feature new file mode 100644 index 0000000000..068d158ed5 --- /dev/null +++ b/changelog.d/13276.feature @@ -0,0 +1 @@ +Add per-room rate limiting for room joins. For each room, Synapse now monitors the rate of join events in that room, and throttle additional joins if that rate grows too large. diff --git a/changelog.d/13278.bugfix b/changelog.d/13278.bugfix new file mode 100644 index 0000000000..49e9377c79 --- /dev/null +++ b/changelog.d/13278.bugfix @@ -0,0 +1 @@ +Fix long-standing bug where in rare instances Synapse could store the incorrect state for a room after a state resolution. diff --git a/changelog.d/13279.misc b/changelog.d/13279.misc new file mode 100644 index 0000000000..a083d2af2a --- /dev/null +++ b/changelog.d/13279.misc @@ -0,0 +1 @@ +Reduce the rebuild time for the complement-synapse docker image. diff --git a/changelog.d/13281.misc b/changelog.d/13281.misc new file mode 100644 index 0000000000..dea51d1362 --- /dev/null +++ b/changelog.d/13281.misc @@ -0,0 +1 @@ +Don't pull out the full state when creating an event. diff --git a/changelog.d/13284.misc b/changelog.d/13284.misc new file mode 100644 index 0000000000..fa9743a10e --- /dev/null +++ b/changelog.d/13284.misc @@ -0,0 +1 @@ +Update locked version of `frozendict` to 2.3.2, which has a fix for a memory leak. diff --git a/changelog.d/13285.misc b/changelog.d/13285.misc new file mode 100644 index 0000000000..b7bcbadb5b --- /dev/null +++ b/changelog.d/13285.misc @@ -0,0 +1 @@ +Upgrade from Poetry 1.1.14 to 1.1.12, to fix bugs when locking packages. diff --git a/changelog.d/13292.misc b/changelog.d/13292.misc new file mode 100644 index 0000000000..67fec55330 --- /dev/null +++ b/changelog.d/13292.misc @@ -0,0 +1 @@ +Make `DictionaryCache` expire full entries if they haven't been queried in a while, even if specific keys have been queried recently. diff --git a/changelog.d/13296.bugfix b/changelog.d/13296.bugfix new file mode 100644 index 0000000000..ff0eb2b4a1 --- /dev/null +++ b/changelog.d/13296.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.18.0 where the `synapse_pushers` metric would overcount pushers when they are replaced. diff --git a/changelog.d/13297.misc b/changelog.d/13297.misc new file mode 100644 index 0000000000..545a62369f --- /dev/null +++ b/changelog.d/13297.misc @@ -0,0 +1 @@ +Use `HTTPStatus` constants in place of literals in tests. \ No newline at end of file diff --git a/changelog.d/13299.misc b/changelog.d/13299.misc new file mode 100644 index 0000000000..a9d5566873 --- /dev/null +++ b/changelog.d/13299.misc @@ -0,0 +1 @@ +Improve performance of query `_get_subset_users_in_room_with_profiles`. diff --git a/changelog.d/13300.misc b/changelog.d/13300.misc new file mode 100644 index 0000000000..ee58add3c4 --- /dev/null +++ b/changelog.d/13300.misc @@ -0,0 +1 @@ +Up batch size of `bulk_get_push_rules` and `_get_joined_profiles_from_event_ids`. diff --git a/changelog.d/13303.misc b/changelog.d/13303.misc new file mode 100644 index 0000000000..03f64ab171 --- /dev/null +++ b/changelog.d/13303.misc @@ -0,0 +1 @@ +Remove unnecessary `json.dumps` from tests. \ No newline at end of file diff --git a/changelog.d/13307.misc b/changelog.d/13307.misc new file mode 100644 index 0000000000..45b628ce13 --- /dev/null +++ b/changelog.d/13307.misc @@ -0,0 +1 @@ +Don't pull out the full state when creating an event. \ No newline at end of file diff --git a/changelog.d/13308.misc b/changelog.d/13308.misc new file mode 100644 index 0000000000..7f8ec0815f --- /dev/null +++ b/changelog.d/13308.misc @@ -0,0 +1 @@ +Use an asynchronous cache wrapper for the get event cache. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13310.misc b/changelog.d/13310.misc new file mode 100644 index 0000000000..eaf570e058 --- /dev/null +++ b/changelog.d/13310.misc @@ -0,0 +1 @@ +Reduce memory usage of sending dummy events. diff --git a/changelog.d/13311.misc b/changelog.d/13311.misc new file mode 100644 index 0000000000..4be81c675c --- /dev/null +++ b/changelog.d/13311.misc @@ -0,0 +1 @@ +Prevent formatting changes of [#3679](https://github.com/matrix-org/synapse/pull/3679) from appearing in `git blame`. \ No newline at end of file diff --git a/changelog.d/13313.misc b/changelog.d/13313.misc new file mode 100644 index 0000000000..0f3c1f0afd --- /dev/null +++ b/changelog.d/13313.misc @@ -0,0 +1 @@ +Change `get_users_in_room` and `get_rooms_for_user` caches to enable pruning of old entries. diff --git a/changelog.d/13314.doc b/changelog.d/13314.doc new file mode 100644 index 0000000000..75c71ef27a --- /dev/null +++ b/changelog.d/13314.doc @@ -0,0 +1 @@ +Add notes when config options where changed. Contributed by @behrmann. diff --git a/changelog.d/13318.misc b/changelog.d/13318.misc new file mode 100644 index 0000000000..f5cd26b862 --- /dev/null +++ b/changelog.d/13318.misc @@ -0,0 +1 @@ +Validate federation destinations and log an error if a destination is invalid. diff --git a/changelog.d/13320.misc b/changelog.d/13320.misc new file mode 100644 index 0000000000..d33cf3a25a --- /dev/null +++ b/changelog.d/13320.misc @@ -0,0 +1 @@ +Fix `FederationClient.get_pdu()` returning events from the cache as `outliers` instead of original events we saw over federation. diff --git a/changelog.d/13323.misc b/changelog.d/13323.misc new file mode 100644 index 0000000000..3caa94a2f6 --- /dev/null +++ b/changelog.d/13323.misc @@ -0,0 +1 @@ +Reduce memory usage of state caches. diff --git a/changelog.d/13324.misc b/changelog.d/13324.misc new file mode 100644 index 0000000000..30670cf56c --- /dev/null +++ b/changelog.d/13324.misc @@ -0,0 +1 @@ +Reduce the amount of state we store in the `state_cache`. diff --git a/changelog.d/13326.removal b/changelog.d/13326.removal new file mode 100644 index 0000000000..8112286671 --- /dev/null +++ b/changelog.d/13326.removal @@ -0,0 +1 @@ +Stop builindg `.deb` packages for Ubuntu 21.10 (Impish Indri), which has reached end of life. diff --git a/changelog.d/13328.misc b/changelog.d/13328.misc new file mode 100644 index 0000000000..c80578ce95 --- /dev/null +++ b/changelog.d/13328.misc @@ -0,0 +1 @@ +Add missing type hints to open tracing module. diff --git a/changelog.d/13329.misc b/changelog.d/13329.misc new file mode 100644 index 0000000000..4df9a9f6d7 --- /dev/null +++ b/changelog.d/13329.misc @@ -0,0 +1 @@ +Remove old base slaved store and de-duplicate cache ID generators. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13333.doc b/changelog.d/13333.doc new file mode 100644 index 0000000000..57cbdf05c8 --- /dev/null +++ b/changelog.d/13333.doc @@ -0,0 +1 @@ +Document the new `rc_invites.per_issuer` throttling option added in Synapse 1.63. \ No newline at end of file diff --git a/changelog.d/13338.doc b/changelog.d/13338.doc new file mode 100644 index 0000000000..7acf6d3f34 --- /dev/null +++ b/changelog.d/13338.doc @@ -0,0 +1 @@ +Mention that BuildKit is needed when building Docker images for tests. diff --git a/changelog.d/13342.misc b/changelog.d/13342.misc new file mode 100644 index 0000000000..ce9c816b9c --- /dev/null +++ b/changelog.d/13342.misc @@ -0,0 +1 @@ +When reporting metrics is enabled, use ~8x less data to describe DB transaction metrics. diff --git a/changelog.d/13345.misc b/changelog.d/13345.misc new file mode 100644 index 0000000000..c80578ce95 --- /dev/null +++ b/changelog.d/13345.misc @@ -0,0 +1 @@ +Add missing type hints to open tracing module. diff --git a/changelog.d/13349.misc b/changelog.d/13349.misc new file mode 100644 index 0000000000..4df9a9f6d7 --- /dev/null +++ b/changelog.d/13349.misc @@ -0,0 +1 @@ +Remove old base slaved store and de-duplicate cache ID generators. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13352.bugfix b/changelog.d/13352.bugfix new file mode 100644 index 0000000000..8128714299 --- /dev/null +++ b/changelog.d/13352.bugfix @@ -0,0 +1 @@ +Update locked version of `frozendict` to 2.3.3, which has fixes for memory leaks affecting `/sync`. diff --git a/changelog.d/13354.misc b/changelog.d/13354.misc new file mode 100644 index 0000000000..e08ee7866a --- /dev/null +++ b/changelog.d/13354.misc @@ -0,0 +1 @@ +Faster room joins: skip soft fail checks while Synapse only has partial room state, since the current membership of event senders may not be accurately known. diff --git a/changelog.d/13362.misc b/changelog.d/13362.misc new file mode 100644 index 0000000000..c80578ce95 --- /dev/null +++ b/changelog.d/13362.misc @@ -0,0 +1 @@ +Add missing type hints to open tracing module. diff --git a/contrib/workers-bash-scripts/create-multiple-generic-workers.md b/contrib/workers-bash-scripts/create-multiple-generic-workers.md new file mode 100644 index 0000000000..d303101429 --- /dev/null +++ b/contrib/workers-bash-scripts/create-multiple-generic-workers.md @@ -0,0 +1,31 @@ +# Creating multiple generic workers with a bash script + +Setting up multiple worker configuration files manually can be time-consuming. +You can alternatively create multiple worker configuration files with a simple `bash` script. For example: + +```sh +#!/bin/bash +for i in {1..5} +do +cat << EOF >> generic_worker$i.yaml +worker_app: synapse.app.generic_worker +worker_name: generic_worker$i + +# The replication listener on the main synapse process. +worker_replication_host: 127.0.0.1 +worker_replication_http_port: 9093 + +worker_listeners: + - type: http + port: 808$i + resources: + - names: [client, federation] + +worker_log_config: /etc/matrix-synapse/generic-worker-log.yaml +EOF +done +``` + +This would create five generic workers with a unique `worker_name` field in each file and listening on ports 8081-8085. + +Customise the script to your needs. diff --git a/contrib/workers-bash-scripts/create-multiple-stream-writers.md b/contrib/workers-bash-scripts/create-multiple-stream-writers.md new file mode 100644 index 0000000000..0d2ca780a6 --- /dev/null +++ b/contrib/workers-bash-scripts/create-multiple-stream-writers.md @@ -0,0 +1,145 @@ +# Creating multiple stream writers with a bash script + +This script creates multiple [stream writer](https://github.com/matrix-org/synapse/blob/develop/docs/workers.md#stream-writers) workers. + +Stream writers require both replication and HTTP listeners. + +It also prints out the example lines for Synapse main configuration file. + +Remember to route necessary endpoints directly to a worker associated with it. + +If you run the script as-is, it will create workers with the replication listener starting from port 8034 and another, regular http listener starting from 8044. If you don't need all of the stream writers listed in the script, just remove them from the ```STREAM_WRITERS``` array. + +```sh +#!/bin/bash + +# Start with these replication and http ports. +# The script loop starts with the exact port and then increments it by one. +REP_START_PORT=8034 +HTTP_START_PORT=8044 + +# Stream writer workers to generate. Feel free to add or remove them as you wish. +# Event persister ("events") isn't included here as it does not require its +# own HTTP listener. + +STREAM_WRITERS+=( "presence" "typing" "receipts" "to_device" "account_data" ) + +NUM_WRITERS=$(expr ${#STREAM_WRITERS[@]}) + +i=0 + +while [ $i -lt "$NUM_WRITERS" ] +do +cat << EOF > ${STREAM_WRITERS[$i]}_stream_writer.yaml +worker_app: synapse.app.generic_worker +worker_name: ${STREAM_WRITERS[$i]}_stream_writer + +# The replication listener on the main synapse process. +worker_replication_host: 127.0.0.1 +worker_replication_http_port: 9093 + +worker_listeners: + - type: http + port: $(expr $REP_START_PORT + $i) + resources: + - names: [replication] + + - type: http + port: $(expr $HTTP_START_PORT + $i) + resources: + - names: [client] + +worker_log_config: /etc/matrix-synapse/stream-writer-log.yaml +EOF +HOMESERVER_YAML_INSTANCE_MAP+=$" ${STREAM_WRITERS[$i]}_stream_writer: + host: 127.0.0.1 + port: $(expr $REP_START_PORT + $i) +" + +HOMESERVER_YAML_STREAM_WRITERS+=$" ${STREAM_WRITERS[$i]}: ${STREAM_WRITERS[$i]}_stream_writer +" + +((i++)) +done + +cat << EXAMPLECONFIG +# Add these lines to your homeserver.yaml. +# Don't forget to configure your reverse proxy and +# necessary endpoints to their respective worker. + +# See https://github.com/matrix-org/synapse/blob/develop/docs/workers.md +# for more information. + +# Remember: Under NO circumstances should the replication +# listener be exposed to the public internet; +# it has no authentication and is unencrypted. + +instance_map: +$HOMESERVER_YAML_INSTANCE_MAP +stream_writers: +$HOMESERVER_YAML_STREAM_WRITERS +EXAMPLECONFIG +``` + +Copy the code above save it to a file ```create_stream_writers.sh``` (for example). + +Make the script executable by running ```chmod +x create_stream_writers.sh```. + +## Run the script to create workers and print out a sample configuration + +Simply run the script to create YAML files in the current folder and print out the required configuration for ```homeserver.yaml```. + +```console +$ ./create_stream_writers.sh + +# Add these lines to your homeserver.yaml. +# Don't forget to configure your reverse proxy and +# necessary endpoints to their respective worker. + +# See https://github.com/matrix-org/synapse/blob/develop/docs/workers.md +# for more information + +# Remember: Under NO circumstances should the replication +# listener be exposed to the public internet; +# it has no authentication and is unencrypted. + +instance_map: + presence_stream_writer: + host: 127.0.0.1 + port: 8034 + typing_stream_writer: + host: 127.0.0.1 + port: 8035 + receipts_stream_writer: + host: 127.0.0.1 + port: 8036 + to_device_stream_writer: + host: 127.0.0.1 + port: 8037 + account_data_stream_writer: + host: 127.0.0.1 + port: 8038 + +stream_writers: + presence: presence_stream_writer + typing: typing_stream_writer + receipts: receipts_stream_writer + to_device: to_device_stream_writer + account_data: account_data_stream_writer +``` + +Simply copy-and-paste the output to an appropriate place in your Synapse main configuration file. + +## Write directly to Synapse configuration file + +You could also write the output directly to homeserver main configuration file. **This, however, is not recommended** as even a small typo (such as replacing >> with >) can erase the entire ```homeserver.yaml```. + +If you do this, back up your original configuration file first: + +```console +# Back up homeserver.yaml first +cp /etc/matrix-synapse/homeserver.yaml /etc/matrix-synapse/homeserver.yaml.bak + +# Create workers and write output to your homeserver.yaml +./create_stream_writers.sh >> /etc/matrix-synapse/homeserver.yaml +``` diff --git a/debian/changelog b/debian/changelog index 753a03065a..9417f8714f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,53 @@ +matrix-synapse-py3 (1.63.1) stable; urgency=medium + + * New Synapse release 1.63.1. + + -- Synapse Packaging team <packages@matrix.org> Wed, 20 Jul 2022 13:36:52 +0100 + +matrix-synapse-py3 (1.63.0) stable; urgency=medium + + * Clarify that homeserver server names are included in the data reported + by opt-in server stats reporting (`report_stats` homeserver config option). + * New Synapse release 1.63.0. + + -- Synapse Packaging team <packages@matrix.org> Tue, 19 Jul 2022 14:42:24 +0200 + +matrix-synapse-py3 (1.63.0~rc1) stable; urgency=medium + + * New Synapse release 1.63.0rc1. + + -- Synapse Packaging team <packages@matrix.org> Tue, 12 Jul 2022 11:26:02 +0100 + +matrix-synapse-py3 (1.62.0) stable; urgency=medium + + * New Synapse release 1.62.0. + + -- Synapse Packaging team <packages@matrix.org> Tue, 05 Jul 2022 11:14:15 +0100 + +matrix-synapse-py3 (1.62.0~rc3) stable; urgency=medium + + * New Synapse release 1.62.0rc3. + + -- Synapse Packaging team <packages@matrix.org> Mon, 04 Jul 2022 16:07:01 +0100 + +matrix-synapse-py3 (1.62.0~rc2) stable; urgency=medium + + * New Synapse release 1.62.0rc2. + + -- Synapse Packaging team <packages@matrix.org> Fri, 01 Jul 2022 11:42:41 +0100 + +matrix-synapse-py3 (1.62.0~rc1) stable; urgency=medium + + * New Synapse release 1.62.0rc1. + + -- Synapse Packaging team <packages@matrix.org> Tue, 28 Jun 2022 16:34:57 +0100 + +matrix-synapse-py3 (1.61.1) stable; urgency=medium + + * New Synapse release 1.61.1. + + -- Synapse Packaging team <packages@matrix.org> Tue, 28 Jun 2022 14:33:46 +0100 + matrix-synapse-py3 (1.61.0) stable; urgency=medium * New Synapse release 1.61.0. diff --git a/debian/matrix-synapse-py3.postinst b/debian/matrix-synapse-py3.postinst index a8dde1e082..029b9e0243 100644 --- a/debian/matrix-synapse-py3.postinst +++ b/debian/matrix-synapse-py3.postinst @@ -31,7 +31,7 @@ EOF # This file is autogenerated, and will be recreated on upgrade if it is deleted. # Any changes you make will be preserved. -# Whether to report anonymized homeserver usage statistics. +# Whether to report homeserver usage statistics. report_stats: false EOF fi diff --git a/debian/po/templates.pot b/debian/po/templates.pot index f0af9e70fb..445e4aac81 100644 --- a/debian/po/templates.pot +++ b/debian/po/templates.pot @@ -37,7 +37,7 @@ msgstr "" #. Type: boolean #. Description #: ../templates:2001 -msgid "Report anonymous statistics?" +msgid "Report homeserver usage statistics?" msgstr "" #. Type: boolean @@ -45,11 +45,11 @@ msgstr "" #: ../templates:2001 msgid "" "Developers of Matrix and Synapse really appreciate helping the project out " -"by reporting anonymized usage statistics from this homeserver. Only very " -"basic aggregate data (e.g. number of users) will be reported, but it helps " -"track the growth of the Matrix community, and helps in making Matrix a " -"success, as well as to convince other networks that they should peer with " -"Matrix." +"by reporting homeserver usage statistics from this homeserver. Your " +"homeserver's server name, along with very basic aggregate data (e.g. " +"number of users) will be reported. But it helps track the growth of the " +"Matrix community, and helps in making Matrix a success, as well as to " +"convince other networks that they should peer with Matrix." msgstr "" #. Type: boolean diff --git a/debian/templates b/debian/templates index 458fe8bbe9..23e24e1059 100644 --- a/debian/templates +++ b/debian/templates @@ -10,12 +10,13 @@ _Description: Name of the server: Template: matrix-synapse/report-stats Type: boolean Default: false -_Description: Report anonymous statistics? +_Description: Report homeserver usage statistics? Developers of Matrix and Synapse really appreciate helping the - project out by reporting anonymized usage statistics from this - homeserver. Only very basic aggregate data (e.g. number of users) - will be reported, but it helps track the growth of the Matrix - community, and helps in making Matrix a success, as well as to - convince other networks that they should peer with Matrix. + project out by reporting homeserver usage statistics from this + homeserver. Your homeserver's server name, along with very basic + aggregate data (e.g. number of users) will be reported. But it + helps track the growth of the Matrix community, and helps in + making Matrix a success, as well as to convince other networks + that they should peer with Matrix. . Thank you. diff --git a/docker/Dockerfile b/docker/Dockerfile index c676f83775..f4d8e6c925 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -45,7 +45,7 @@ RUN \ # We install poetry in its own build stage to avoid its dependencies conflicting with # synapse's dependencies. -# We use a specific commit from poetry's master branch instead of our usual 1.1.12, +# We use a specific commit from poetry's master branch instead of our usual 1.1.14, # to incorporate fixes to some bugs in `poetry export`. This commit corresponds to # https://github.com/python-poetry/poetry/pull/5156 and # https://github.com/python-poetry/poetry/issues/5141 ; @@ -62,7 +62,13 @@ WORKDIR /synapse # Copy just what we need to run `poetry export`... COPY pyproject.toml poetry.lock /synapse/ -RUN /root/.local/bin/poetry export --extras all -o /synapse/requirements.txt + +# If specified, we won't verify the hashes of dependencies. +# This is only needed if the hashes of dependencies cannot be checked for some +# reason, such as when a git repository is used directly as a dependency. +ARG TEST_ONLY_SKIP_DEP_HASH_VERIFICATION + +RUN /root/.local/bin/poetry export --extras all -o /synapse/requirements.txt ${TEST_ONLY_SKIP_DEP_HASH_VERIFICATION:+--without-hashes} ### ### Stage 1: builder @@ -85,6 +91,7 @@ RUN \ openssl \ rustc \ zlib1g-dev \ + git \ && rm -rf /var/lib/apt/lists/* # To speed up rebuilds, install all of the dependencies before we copy over diff --git a/docker/Dockerfile-workers b/docker/Dockerfile-workers index 0f1570cfb6..84f836ff7b 100644 --- a/docker/Dockerfile-workers +++ b/docker/Dockerfile-workers @@ -1,3 +1,4 @@ +# syntax=docker/dockerfile:1 # Inherit from the official Synapse docker image ARG SYNAPSE_VERSION=latest FROM matrixdotorg/synapse:$SYNAPSE_VERSION diff --git a/docker/README-testing.md b/docker/README-testing.md index 1f0423f09b..21b99963d8 100644 --- a/docker/README-testing.md +++ b/docker/README-testing.md @@ -22,6 +22,10 @@ Consult the [contributing guide][guideComplementSh] for instructions on how to u Under some circumstances, you may wish to build the images manually. The instructions below will lead you to doing that. +Note that these images can only be built using [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/), +therefore BuildKit needs to be enabled when calling `docker build`. This can be done by +setting `DOCKER_BUILDKIT=1` in your environment. + Start by building the base Synapse docker image. If you wish to run tests with the latest release of Synapse, instead of your current checkout, you can skip this step. From the root of the repository: diff --git a/docker/README.md b/docker/README.md index 67c3bc65f0..5b7de2fe38 100644 --- a/docker/README.md +++ b/docker/README.md @@ -67,6 +67,13 @@ The following environment variables are supported in `generate` mode: * `UID`, `GID`: the user id and group id to use for creating the data directories. If unset, and no user is set via `docker run --user`, defaults to `991`, `991`. +* `SYNAPSE_LOG_LEVEL`: the log level to use (one of `DEBUG`, `INFO`, `WARNING` or `ERROR`). + Defaults to `INFO`. +* `SYNAPSE_LOG_SENSITIVE`: if set and the log level is set to `DEBUG`, Synapse + will log sensitive information such as access tokens. + This should not be needed unless you are a developer attempting to debug something + particularly tricky. + ## Postgres diff --git a/docker/complement/Dockerfile b/docker/complement/Dockerfile index 8bec0f6116..3cfff19f9a 100644 --- a/docker/complement/Dockerfile +++ b/docker/complement/Dockerfile @@ -1,45 +1,62 @@ +# syntax=docker/dockerfile:1 # This dockerfile builds on top of 'docker/Dockerfile-workers' in matrix-org/synapse # by including a built-in postgres instance, as well as setting up the homeserver so # that it is ready for testing via Complement. # # Instructions for building this image from those it depends on is detailed in this guide: # https://github.com/matrix-org/synapse/blob/develop/docker/README-testing.md#testing-with-postgresql-and-single-or-multi-process-synapse -ARG SYNAPSE_VERSION=latest -FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION - -# Install postgresql -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -yqq postgresql-13 - -# Configure a user and create a database for Synapse -RUN pg_ctlcluster 13 main start && su postgres -c "echo \ - \"ALTER USER postgres PASSWORD 'somesecret'; \ - CREATE DATABASE synapse \ - ENCODING 'UTF8' \ - LC_COLLATE='C' \ - LC_CTYPE='C' \ - template=template0;\" | psql" && pg_ctlcluster 13 main stop -# Extend the shared homeserver config to disable rate-limiting, -# set Complement's static shared secret, enable registration, amongst other -# tweaks to get Synapse ready for testing. -# To do this, we copy the old template out of the way and then include it -# with Jinja2. -RUN mv /conf/shared.yaml.j2 /conf/shared-orig.yaml.j2 -COPY conf/workers-shared-extra.yaml.j2 /conf/shared.yaml.j2 - -WORKDIR /data +ARG SYNAPSE_VERSION=latest -COPY conf/postgres.supervisord.conf /etc/supervisor/conf.d/postgres.conf +# first of all, we create a base image with a postgres server and database, +# which we can copy into the target image. For repeated rebuilds, this is +# much faster than apt installing postgres each time. +# +# This trick only works because (a) the Synapse image happens to have all the +# shared libraries that postgres wants, (b) we use a postgres image based on +# the same debian version as Synapse's docker image (so the versions of the +# shared libraries match). -# Copy the entrypoint -COPY conf/start_for_complement.sh / +FROM postgres:13-bullseye AS postgres_base + # initialise the database cluster in /var/lib/postgresql + RUN gosu postgres initdb --locale=C --encoding=UTF-8 --auth-host password -# Expose nginx's listener ports -EXPOSE 8008 8448 + # Configure a password and create a database for Synapse + RUN echo "ALTER USER postgres PASSWORD 'somesecret'" | gosu postgres postgres --single + RUN echo "CREATE DATABASE synapse" | gosu postgres postgres --single -ENTRYPOINT ["/start_for_complement.sh"] +# now build the final image, based on the Synapse image. -# Update the healthcheck to have a shorter check interval -HEALTHCHECK --start-period=5s --interval=1s --timeout=1s \ - CMD /bin/sh /healthcheck.sh +FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION + # copy the postgres installation over from the image we built above + RUN adduser --system --uid 999 postgres --home /var/lib/postgresql + COPY --from=postgres_base /var/lib/postgresql /var/lib/postgresql + COPY --from=postgres_base /usr/lib/postgresql /usr/lib/postgresql + COPY --from=postgres_base /usr/share/postgresql /usr/share/postgresql + RUN mkdir /var/run/postgresql && chown postgres /var/run/postgresql + ENV PATH="${PATH}:/usr/lib/postgresql/13/bin" + ENV PGDATA=/var/lib/postgresql/data + + # Extend the shared homeserver config to disable rate-limiting, + # set Complement's static shared secret, enable registration, amongst other + # tweaks to get Synapse ready for testing. + # To do this, we copy the old template out of the way and then include it + # with Jinja2. + RUN mv /conf/shared.yaml.j2 /conf/shared-orig.yaml.j2 + COPY conf/workers-shared-extra.yaml.j2 /conf/shared.yaml.j2 + + WORKDIR /data + + COPY conf/postgres.supervisord.conf /etc/supervisor/conf.d/postgres.conf + + # Copy the entrypoint + COPY conf/start_for_complement.sh / + + # Expose nginx's listener ports + EXPOSE 8008 8448 + + ENTRYPOINT ["/start_for_complement.sh"] + + # Update the healthcheck to have a shorter check interval + HEALTHCHECK --start-period=5s --interval=1s --timeout=1s \ + CMD /bin/sh /healthcheck.sh diff --git a/docker/complement/README.md b/docker/complement/README.md index 37c39e2dfc..62682219e8 100644 --- a/docker/complement/README.md +++ b/docker/complement/README.md @@ -7,7 +7,7 @@ so **please don't use this image for a production server**. This multi-purpose image is built on top of `Dockerfile-workers` in the parent directory and can be switched using environment variables between the following configurations: -- Monolithic Synapse with SQLite (`SYNAPSE_COMPLEMENT_DATABASE=sqlite`) +- Monolithic Synapse with SQLite (default, or `SYNAPSE_COMPLEMENT_DATABASE=sqlite`) - Monolithic Synapse with Postgres (`SYNAPSE_COMPLEMENT_DATABASE=postgres`) - Workerised Synapse with Postgres (`SYNAPSE_COMPLEMENT_DATABASE=postgres` and `SYNAPSE_COMPLEMENT_USE_WORKERS=true`) diff --git a/docker/complement/conf/postgres.supervisord.conf b/docker/complement/conf/postgres.supervisord.conf index 5dae3e6330..b88bfc772e 100644 --- a/docker/complement/conf/postgres.supervisord.conf +++ b/docker/complement/conf/postgres.supervisord.conf @@ -1,5 +1,5 @@ [program:postgres] -command=/usr/local/bin/prefix-log /usr/bin/pg_ctlcluster 13 main start --foreground +command=/usr/local/bin/prefix-log gosu postgres postgres # Only start if START_POSTGRES=1 autostart=%(ENV_START_POSTGRES)s diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh index b9c97ab687..cc6482f763 100755 --- a/docker/complement/conf/start_for_complement.sh +++ b/docker/complement/conf/start_for_complement.sh @@ -31,7 +31,7 @@ case "$SYNAPSE_COMPLEMENT_DATABASE" in export START_POSTGRES=true ;; - sqlite) + sqlite|"") # Configure supervisord not to start Postgres, as we don't need it export START_POSTGRES=false ;; @@ -59,6 +59,9 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then synchrotron, \ appservice, \ pusher" + + # Improve startup times by using a launcher based on fork() + export SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER=1 else # Empty string here means 'main process only' export SYNAPSE_WORKER_TYPES="" @@ -73,14 +76,30 @@ fi # Generate a TLS key, then generate a certificate by having Complement's CA sign it # Note that both the key and certificate are in PEM format (not DER). + +# First generate a configuration file to set up a Subject Alternative Name. +cat > /conf/server.tls.conf <<EOF +.include /etc/ssl/openssl.cnf + +[SAN] +subjectAltName=DNS:${SERVER_NAME} +EOF + +# Generate an RSA key openssl genrsa -out /conf/server.tls.key 2048 -openssl req -new -key /conf/server.tls.key -out /conf/server.tls.csr \ - -subj "/CN=${SERVER_NAME}" +# Generate a certificate signing request +openssl req -new -config /conf/server.tls.conf -key /conf/server.tls.key -out /conf/server.tls.csr \ + -subj "/CN=${SERVER_NAME}" -reqexts SAN +# Make the Complement Certificate Authority sign and generate a certificate. openssl x509 -req -in /conf/server.tls.csr \ -CA /complement/ca/ca.crt -CAkey /complement/ca/ca.key -set_serial 1 \ - -out /conf/server.tls.crt + -out /conf/server.tls.crt -extfile /conf/server.tls.conf -extensions SAN + +# Assert that we have a Subject Alternative Name in the certificate. +# (grep will exit with 1 here if there isn't a SAN in the certificate.) +openssl x509 -in /conf/server.tls.crt -noout -text | grep DNS: export SYNAPSE_TLS_CERT=/conf/server.tls.crt export SYNAPSE_TLS_KEY=/conf/server.tls.key diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2 index a5b1b6bb8b..9e554a865e 100644 --- a/docker/complement/conf/workers-shared-extra.yaml.j2 +++ b/docker/complement/conf/workers-shared-extra.yaml.j2 @@ -67,6 +67,10 @@ rc_joins: per_second: 9999 burst_count: 9999 +rc_joins_per_room: + per_second: 9999 + burst_count: 9999 + rc_3pid_validation: per_second: 1000 burst_count: 1000 @@ -81,6 +85,8 @@ rc_invites: federation_rr_transactions_per_room_per_second: 9999 +allow_device_name_lookup_over_federation: true + ## Experimental Features ## experimental_features: @@ -103,4 +109,10 @@ server_notices: system_mxid_avatar_url: "" room_name: "Server Alert" + +# Disable sync cache so that initial `/sync` requests are up-to-date. +caches: + sync_response_cache_duration: 0 + + {% include "shared-orig.yaml.j2" %} diff --git a/docker/conf-workers/supervisord.conf.j2 b/docker/conf-workers/supervisord.conf.j2 index 7afab05133..086137494e 100644 --- a/docker/conf-workers/supervisord.conf.j2 +++ b/docker/conf-workers/supervisord.conf.j2 @@ -31,17 +31,3 @@ autorestart=true # Redis can be disabled if the image is being used without workers autostart={{ enable_redis }} -[program:synapse_main] -command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver --config-path="{{ main_config_path }}" --config-path=/conf/workers/shared.yaml -priority=10 -# Log startup failures to supervisord's stdout/err -# Regular synapse logs will still go in the configured data directory -stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -stderr_logfile=/dev/stderr -stderr_logfile_maxbytes=0 -autorestart=unexpected -exitcodes=0 - -# Additional process blocks -{{ worker_config }} diff --git a/docker/conf-workers/synapse.supervisord.conf.j2 b/docker/conf-workers/synapse.supervisord.conf.j2 new file mode 100644 index 0000000000..481eb4fc92 --- /dev/null +++ b/docker/conf-workers/synapse.supervisord.conf.j2 @@ -0,0 +1,52 @@ +{% if use_forking_launcher %} +[program:synapse_fork] +command=/usr/local/bin/python -m synapse.app.complement_fork_starter + {{ main_config_path }} + synapse.app.homeserver + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml + {%- for worker in workers %} + -- {{ worker.app }} + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml + --config-path=/conf/workers/{{ worker.name }}.yaml + {%- endfor %} +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +autorestart=unexpected +exitcodes=0 + +{% else %} +[program:synapse_main] +command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml +priority=10 +# Log startup failures to supervisord's stdout/err +# Regular synapse logs will still go in the configured data directory +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +autorestart=unexpected +exitcodes=0 + + + {% for worker in workers %} +[program:synapse_{{ worker.name }}] +command=/usr/local/bin/prefix-log /usr/local/bin/python -m {{ worker.app }} + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml + --config-path=/conf/workers/{{ worker.name }}.yaml +autorestart=unexpected +priority=500 +exitcodes=0 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + + {% endfor %} +{% endif %} diff --git a/docker/conf/log.config b/docker/conf/log.config index dc8c70befd..90b5179838 100644 --- a/docker/conf/log.config +++ b/docker/conf/log.config @@ -2,7 +2,11 @@ version: 1 formatters: precise: + {% if include_worker_name_in_log_line %} + format: '{{ worker_name }} | %(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' + {% else %} format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' + {% endif %} handlers: {% if LOG_FILE_PATH %} @@ -45,11 +49,17 @@ handlers: class: logging.StreamHandler formatter: precise +{% if not SYNAPSE_LOG_SENSITIVE %} +{# + If SYNAPSE_LOG_SENSITIVE is unset, then override synapse.storage.SQL to INFO + so that DEBUG entries (containing sensitive information) are not emitted. +#} loggers: synapse.storage.SQL: # beware: increasing this to DEBUG will make synapse log sensitive # information such as access tokens. level: INFO +{% endif %} root: level: {{ SYNAPSE_LOG_LEVEL or "INFO" }} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 2a2c13f77a..51583dc13d 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -26,6 +26,13 @@ # * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. # * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified, # Nginx will be configured to serve TLS on port 8448. +# * SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER: Whether to use the forking launcher, +# only intended for usage in Complement at the moment. +# No stability guarantees are provided. +# * SYNAPSE_LOG_LEVEL: Set this to DEBUG, INFO, WARNING or ERROR to change the +# log level. INFO is the default. +# * SYNAPSE_LOG_SENSITIVE: If unset, SQL and SQL values won't be logged, +# regardless of the SYNAPSE_LOG_LEVEL setting. # # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined # in the project's README), this script may be run multiple times, and functionality should @@ -35,7 +42,7 @@ import os import subprocess import sys from pathlib import Path -from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Set +from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Optional, Set import yaml from jinja2 import Environment, FileSystemLoader @@ -176,21 +183,6 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { } # Templates for sections that may be inserted multiple times in config files -SUPERVISORD_PROCESS_CONFIG_BLOCK = """ -[program:synapse_{name}] -command=/usr/local/bin/prefix-log /usr/local/bin/python -m {app} \ - --config-path="{config_path}" \ - --config-path=/conf/workers/shared.yaml \ - --config-path=/conf/workers/{name}.yaml -autorestart=unexpected -priority=500 -exitcodes=0 -stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -stderr_logfile=/dev/stderr -stderr_logfile_maxbytes=0 -""" - NGINX_LOCATION_CONFIG_BLOCK = """ location ~* {endpoint} {{ proxy_pass {upstream}; @@ -353,13 +345,10 @@ def generate_worker_files( # This config file will be passed to all workers, included Synapse's main process. shared_config: Dict[str, Any] = {"listeners": listeners} - # The supervisord config. The contents of which will be inserted into the - # base supervisord jinja2 template. - # - # Supervisord will be in charge of running everything, from redis to nginx to Synapse - # and all of its worker processes. Load the config template, which defines a few - # services that are necessary to run. - supervisord_config = "" + # List of dicts that describe workers. + # We pass this to the Supervisor template later to generate the appropriate + # program blocks. + worker_descriptors: List[Dict[str, Any]] = [] # Upstreams for load-balancing purposes. This dict takes the form of a worker type to the # ports of each worker. For example: @@ -437,7 +426,7 @@ def generate_worker_files( ) # Enable the worker in supervisord - supervisord_config += SUPERVISORD_PROCESS_CONFIG_BLOCK.format_map(worker_config) + worker_descriptors.append(worker_config) # Add nginx location blocks for this worker's endpoints (if any are defined) for pattern in worker_config["endpoint_patterns"]: @@ -535,10 +524,17 @@ def generate_worker_files( "/conf/supervisord.conf.j2", "/etc/supervisor/supervisord.conf", main_config_path=config_path, - worker_config=supervisord_config, enable_redis=workers_in_use, ) + convert( + "/conf/synapse.supervisord.conf.j2", + "/etc/supervisor/conf.d/synapse.conf", + workers=worker_descriptors, + main_config_path=config_path, + use_forking_launcher=environ.get("SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER"), + ) + # healthcheck config convert( "/conf/healthcheck.sh.j2", @@ -560,18 +556,25 @@ def generate_worker_log_config( Returns: the path to the generated file """ # Check whether we should write worker logs to disk, in addition to the console - extra_log_template_args = {} + extra_log_template_args: Dict[str, Optional[str]] = {} if environ.get("SYNAPSE_WORKERS_WRITE_LOGS_TO_DISK"): - extra_log_template_args["LOG_FILE_PATH"] = "{dir}/logs/{name}.log".format( - dir=data_dir, name=worker_name - ) + extra_log_template_args["LOG_FILE_PATH"] = f"{data_dir}/logs/{worker_name}.log" + + extra_log_template_args["SYNAPSE_LOG_LEVEL"] = environ.get("SYNAPSE_LOG_LEVEL") + extra_log_template_args["SYNAPSE_LOG_SENSITIVE"] = environ.get( + "SYNAPSE_LOG_SENSITIVE" + ) + # Render and write the file - log_config_filepath = "/conf/workers/{name}.log.config".format(name=worker_name) + log_config_filepath = f"/conf/workers/{worker_name}.log.config" convert( "/conf/log.config", log_config_filepath, worker_name=worker_name, **extra_log_template_args, + include_worker_name_in_log_line=environ.get( + "SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER" + ), ) return log_config_filepath diff --git a/docker/start.py b/docker/start.py index 4ac8f03477..5a98dce551 100755 --- a/docker/start.py +++ b/docker/start.py @@ -110,7 +110,11 @@ def generate_config_from_template( log_config_file = environ["SYNAPSE_LOG_CONFIG"] log("Generating log config file " + log_config_file) - convert("/conf/log.config", log_config_file, environ) + convert( + "/conf/log.config", + log_config_file, + {**environ, "include_worker_name_in_log_line": False}, + ) # Hopefully we already have a signing key, but generate one if not. args = [ diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d7cf2df112..2d56b084e2 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -35,7 +35,6 @@ - [Application Services](application_services.md) - [Server Notices](server_notices.md) - [Consent Tracking](consent_tracking.md) - - [URL Previews](development/url_previews.md) - [User Directory](user_directory.md) - [Message Retention Policies](message_retention_policies.md) - [Pluggable Modules](modules/index.md) @@ -55,7 +54,6 @@ - [Admin API](usage/administration/admin_api/README.md) - [Account Validity](admin_api/account_validity.md) - [Background Updates](usage/administration/admin_api/background_updates.md) - - [Delete Group](admin_api/delete_group.md) - [Event Reports](admin_api/event_reports.md) - [Media](admin_api/media_admin_api.md) - [Purge History](admin_api/purge_history_api.md) @@ -70,6 +68,7 @@ - [Federation](usage/administration/admin_api/federation.md) - [Manhole](manhole.md) - [Monitoring](metrics-howto.md) + - [Reporting Homeserver Usage Statistics](usage/administration/monitoring/reporting_homeserver_usage_statistics.md) - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md) - [Useful SQL for Admins](usage/administration/useful_sql_for_admins.md) - [Database Maintenance Tools](usage/administration/database_maintenance_tools.md) @@ -81,6 +80,7 @@ # Development - [Contributing Guide](development/contributing_guide.md) - [Code Style](code_style.md) + - [Reviewing Code](development/reviews.md) - [Release Cycle](development/releases.md) - [Git Usage](development/git.md) - [Testing]() diff --git a/docs/admin_api/delete_group.md b/docs/admin_api/delete_group.md deleted file mode 100644 index 73a96842ac..0000000000 --- a/docs/admin_api/delete_group.md +++ /dev/null @@ -1,14 +0,0 @@ -# Delete a local group - -This API lets a server admin delete a local group. Doing so will kick all -users out of the group so that their clients will correctly handle the group -being deleted. - -To use it, you will need to authenticate by providing an `access_token` -for a server admin: see [Admin API](../usage/administration/admin_api). - -The API is: - -``` -POST /_synapse/admin/v1/delete_group/<group_id> -``` diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index d4873f9490..9aa489e4a3 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -59,6 +59,7 @@ The following fields are possible in the JSON response body: - `guest_access` - Whether guests can join the room. One of: ["can_join", "forbidden"]. - `history_visibility` - Who can see the room history. One of: ["invited", "joined", "shared", "world_readable"]. - `state_events` - Total number of state_events of a room. Complexity of the room. + - `room_type` - The type of the room taken from the room's creation event; for example "m.space" if the room is a space. If the room does not define a type, the value will be `null`. * `offset` - The current pagination offset in rooms. This parameter should be used instead of `next_token` for room offset as `next_token` is not intended to be parsed. @@ -101,7 +102,8 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 93534 + "state_events": 93534, + "room_type": "m.space" }, ... (8 hidden items) ... { @@ -118,7 +120,8 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 8345 + "state_events": 8345, + "room_type": null } ], "offset": 0, @@ -151,7 +154,8 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 8 + "state_events": 8, + "room_type": null } ], "offset": 0, @@ -184,7 +188,8 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 93534 + "state_events": 93534, + "room_type": null }, ... (98 hidden items) ... { @@ -201,7 +206,8 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 8345 + "state_events": 8345, + "room_type": "m.space" } ], "offset": 0, @@ -238,7 +244,9 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 93534 + "state_events": 93534, + "room_type": "m.space" + }, ... (48 hidden items) ... { @@ -255,7 +263,9 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 8345 + "state_events": 8345, + "room_type": null + } ], "offset": 100, @@ -290,6 +300,8 @@ The following fields are possible in the JSON response body: * `guest_access` - Whether guests can join the room. One of: ["can_join", "forbidden"]. * `history_visibility` - Who can see the room history. One of: ["invited", "joined", "shared", "world_readable"]. * `state_events` - Total number of state_events of a room. Complexity of the room. +* `room_type` - The type of the room taken from the room's creation event; for example "m.space" if the room is a space. + If the room does not define a type, the value will be `null`. The API is: @@ -317,7 +329,8 @@ A response body like the following is returned: "join_rules": "invite", "guest_access": null, "history_visibility": "shared", - "state_events": 93534 + "state_events": 93534, + "room_type": "m.space" } ``` diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index 62f89e8cba..0871cfebf5 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -124,9 +124,8 @@ Body parameters: - `address` - string. Value of third-party ID. belonging to a user. - `external_ids` - array, optional. Allow setting the identifier of the external identity - provider for SSO (Single sign-on). Details in - [Sample Configuration File](../usage/configuration/homeserver_sample_config.html) - section `sso` and `oidc_providers`. + provider for SSO (Single sign-on). Details in the configuration manual under the + sections [sso](../usage/configuration/config_documentation.md#sso) and [oidc_providers](../usage/configuration/config_documentation.md#oidc_providers). - `auth_provider` - string. ID of the external identity provider. Value of `idp_id` in the homeserver configuration. Note that no error is raised if the provided value is not in the homeserver configuration. @@ -545,7 +544,7 @@ Gets a list of all local media that a specific `user_id` has created. These are media that the user has uploaded themselves ([local media](../media_repository.md#local-media)), as well as [URL preview images](../media_repository.md#url-previews) requested by the user if the -[feature is enabled](../development/url_previews.md). +[feature is enabled](../usage/configuration/config_documentation.md#url_preview_enabled). By default, the response is ordered by descending creation date and ascending media ID. The newest media is on top. You can change the order with parameters diff --git a/docs/code_style.md b/docs/code_style.md index db7edcd76b..d65fda62d1 100644 --- a/docs/code_style.md +++ b/docs/code_style.md @@ -70,82 +70,61 @@ on save as they take a while and can be very resource intensive. - Avoid wildcard imports (`from synapse.types import *`) and relative imports (`from .types import UserID`). -## Configuration file format +## Configuration code and documentation format -The [sample configuration file](./sample_config.yaml) acts as a +When adding a configuration option to the code, if several settings are grouped into a single dict, ensure that your code +correctly handles the top-level option being set to `None` (as it will be if no sub-options are enabled). + +The [configuration manual](usage/configuration/config_documentation.md) acts as a reference to Synapse's configuration options for server administrators. Remember that many readers will be unfamiliar with YAML and server -administration in general, so that it is important that the file be as -easy to understand as possible, which includes following a consistent -format. +administration in general, so it is important that when you add +a configuration option the documentation be as easy to understand as possible, which +includes following a consistent format. Some guidelines follow: -- Sections should be separated with a heading consisting of a single - line prefixed and suffixed with `##`. There should be **two** blank - lines before the section header, and **one** after. -- Each option should be listed in the file with the following format: - - A comment describing the setting. Each line of this comment - should be prefixed with a hash (`#`) and a space. +- Each option should be listed in the config manual with the following format: + + - The name of the option, prefixed by `###`. - The comment should describe the default behaviour (ie, what + - A comment which describes the default behaviour (i.e. what happens if the setting is omitted), as well as what the effect will be if the setting is changed. - - Often, the comment end with something like "uncomment the - following to <do action>". - - - A line consisting of only `#`. - - A commented-out example setting, prefixed with only `#`. + - An example setting, using backticks to define the code block For boolean (on/off) options, convention is that this example - should be the *opposite* to the default (so the comment will end - with "Uncomment the following to enable [or disable] - <feature>." For other options, the example should give some - non-default value which is likely to be useful to the reader. - -- There should be a blank line between each option. -- Where several settings are grouped into a single dict, *avoid* the - convention where the whole block is commented out, resulting in - comment lines starting `# #`, as this is hard to read and confusing - to edit. Instead, leave the top-level config option uncommented, and - follow the conventions above for sub-options. Ensure that your code - correctly handles the top-level option being set to `None` (as it - will be if no sub-options are enabled). -- Lines should be wrapped at 80 characters. -- Use two-space indents. -- `true` and `false` are spelt thus (as opposed to `True`, etc.) -- Use single quotes (`'`) rather than double-quotes (`"`) or backticks - (`` ` ``) to refer to configuration options. + should be the *opposite* to the default. For other options, the example should give + some non-default value which is likely to be useful to the reader. + +- There should be a horizontal rule between each option, which can be achieved by adding `---` before and + after the option. +- `true` and `false` are spelt thus (as opposed to `True`, etc.) Example: +--- +### `modules` + +Use the `module` sub-option to add a module under `modules` to extend functionality. +The `module` setting then has a sub-option, `config`, which can be used to define some configuration +for the `module`. + +Defaults to none. + +Example configuration: ```yaml -## Frobnication ## - -# The frobnicator will ensure that all requests are fully frobnicated. -# To enable it, uncomment the following. -# -#frobnicator_enabled: true - -# By default, the frobnicator will frobnicate with the default frobber. -# The following will make it use an alternative frobber. -# -#frobincator_frobber: special_frobber - -# Settings for the frobber -# -frobber: - # frobbing speed. Defaults to 1. - # - #speed: 10 - - # frobbing distance. Defaults to 1000. - # - #distance: 100 +modules: + - module: my_super_module.MySuperClass + config: + do_thing: true + - module: my_other_super_module.SomeClass + config: {} ``` +--- Note that the sample configuration is generated from the synapse code and is maintained by a script, `scripts-dev/generate_sample_config.sh`. Making sure that the output from this script matches the desired format is left as an exercise for the reader! + diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index c2f04a3905..ab320cbd78 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -309,6 +309,24 @@ The above will run a monolithic (single-process) Synapse with SQLite as the data - Passing `POSTGRES=1` as an environment variable to use the Postgres database instead. - Passing `WORKERS=1` as an environment variable to use a workerised setup instead. This option implies the use of Postgres. +To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`, e.g: +```sh +SYNAPSE_TEST_LOG_LEVEL=DEBUG COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -run TestImportHistoricalMessages +``` + +### Prettier formatting with `gotestfmt` + +If you want to format the output of the tests the same way as it looks in CI, +install [gotestfmt](https://github.com/haveyoudebuggedit/gotestfmt). + +You can then use this incantation to format the tests appropriately: + +```sh +COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -json | gotestfmt -hide successful-tests +``` + +(Remove `-hide successful-tests` if you don't want to hide successful tests.) + ### Access database for homeserver after Complement test runs. @@ -333,7 +351,7 @@ To prepare a Pull Request, please: 3. `git push` your commit to your fork of Synapse; 4. on GitHub, [create the Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request); 5. add a [changelog entry](#changelog) and push it to your Pull Request; -6. for most contributors, that's all - however, if you are a member of the organization `matrix-org`, on GitHub, please request a review from `matrix.org / Synapse Core`. +6. that's it for now, a non-draft pull request will automatically request review from the team; 7. if you need to update your PR, please avoid rebasing and just add new commits to your branch. @@ -509,10 +527,13 @@ From this point, you should: 1. Look at the results of the CI pipeline. - If there is any error, fix the error. 2. If a developer has requested changes, make these changes and let us know if it is ready for a developer to review again. + - A pull request is a conversation, if you disagree with the suggestions, please respond and discuss it. 3. Create a new commit with the changes. - Please do NOT overwrite the history. New commits make the reviewer's life easier. - Push this commits to your Pull Request. 4. Back to 1. +5. Once the pull request is ready for review again please re-request review from whichever developer did your initial + review (or leave a comment in the pull request that you believe all required changes have been done). Once both the CI and the developers are happy, the patch will be merged into Synapse and released shortly! diff --git a/docs/development/dependencies.md b/docs/development/dependencies.md index 8ef7d357d8..236856a6b0 100644 --- a/docs/development/dependencies.md +++ b/docs/development/dependencies.md @@ -237,3 +237,28 @@ poetry run pip install build && poetry run python -m build because [`build`](https://github.com/pypa/build) is a standardish tool which doesn't require poetry. (It's what we use in CI too). However, you could try `poetry build` too. + + +# Troubleshooting + +## Check the version of poetry with `poetry --version`. + +At the time of writing, the 1.2 series is beta only. We have seen some examples +where the lockfiles generated by 1.2 prereleasese aren't interpreted correctly +by poetry 1.1.x. For now, use poetry 1.1.14, which includes a critical +[change](https://github.com/python-poetry/poetry/pull/5973) needed to remain +[compatible with PyPI](https://github.com/pypi/warehouse/pull/11775). + +It can also be useful to check the version of `poetry-core` in use. If you've +installed `poetry` with `pipx`, try `pipx runpip poetry list | grep poetry-core`. + +## Clear caches: `poetry cache clear --all pypi`. + +Poetry caches a bunch of information about packages that isn't readily available +from PyPI. (This is what makes poetry seem slow when doing the first +`poetry install`.) Try `poetry cache list` and `poetry cache clear --all +<name of cache>` to see if that fixes things. + +## Try `--verbose` or `--dry-run` arguments. + +Sometimes useful to see what poetry's internal logic is. diff --git a/docs/development/reviews.md b/docs/development/reviews.md new file mode 100644 index 0000000000..d0379949cb --- /dev/null +++ b/docs/development/reviews.md @@ -0,0 +1,41 @@ +Some notes on how we do reviews +=============================== + +The Synapse team works off a shared review queue -- any new pull requests for +Synapse (or related projects) has a review requested from the entire team. Team +members should process this queue using the following rules: + +* Any high urgency pull requests (e.g. fixes for broken continuous integration + or fixes for release blockers); +* Follow-up reviews for pull requests which have previously received reviews; +* Any remaining pull requests. + +For the latter two categories above, older pull requests should be prioritised. + +It is explicit that there is no priority given to pull requests from the team +(vs from the community). If a pull request requires a quick turn around, please +explicitly communicate this via [#synapse-dev:matrix.org](https://matrix.to/#/#synapse-dev:matrix.org) +or as a comment on the pull request. + +Once an initial review has been completed and the author has made additional changes, +follow-up reviews should go back to the same reviewer. This helps build a shared +context and conversation between author and reviewer. + +As a team we aim to keep the number of inflight pull requests to a minimum to ensure +that ongoing work is finished before starting new work. + +Performing a review +------------------- + +To communicate to the rest of the team the status of each pull request, team +members should do the following: + +* Assign themselves to the pull request (they should be left assigned to the + pull request until it is merged, closed, or are no longer the reviewer); +* Review the pull request by leaving comments, questions, and suggestions; +* Mark the pull request appropriately (as needing changes or accepted). + +If you are unsure about a particular part of the pull request (or are not confident +in your understanding of part of the code) then ask questions or request review +from the team again. When requesting review from the team be sure to leave a comment +with the rationale on why you're putting it back in the queue. diff --git a/docs/development/url_previews.md b/docs/development/url_previews.md deleted file mode 100644 index 154b9a5e12..0000000000 --- a/docs/development/url_previews.md +++ /dev/null @@ -1,61 +0,0 @@ -URL Previews -============ - -The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API -for URLs which outputs [Open Graph](https://ogp.me/) responses (with some Matrix -specific additions). - -This does have trade-offs compared to other designs: - -* Pros: - * Simple and flexible; can be used by any clients at any point -* Cons: - * If each homeserver provides one of these independently, all the HSes in a - room may needlessly DoS the target URI - * The URL metadata must be stored somewhere, rather than just using Matrix - itself to store the media. - * Matrix cannot be used to distribute the metadata between homeservers. - -When Synapse is asked to preview a URL it does the following: - -1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the - config). -2. Checks the in-memory cache by URLs and returns the result if it exists. (This - is also used to de-duplicate processing of multiple in-flight requests at once.) -3. Kicks off a background process to generate a preview: - 1. Checks the database cache by URL and timestamp and returns the result if it - has not expired and was successful (a 2xx return code). - 2. Checks if the URL matches an [oEmbed](https://oembed.com/) pattern. If it - does, update the URL to download. - 3. Downloads the URL and stores it into a file via the media storage provider - and saves the local media metadata. - 4. If the media is an image: - 1. Generates thumbnails. - 2. Generates an Open Graph response based on image properties. - 5. If the media is HTML: - 1. Decodes the HTML via the stored file. - 2. Generates an Open Graph response from the HTML. - 3. If a JSON oEmbed URL was found in the HTML via autodiscovery: - 1. Downloads the URL and stores it into a file via the media storage provider - and saves the local media metadata. - 2. Convert the oEmbed response to an Open Graph response. - 3. Override any Open Graph data from the HTML with data from oEmbed. - 4. If an image exists in the Open Graph response: - 1. Downloads the URL and stores it into a file via the media storage - provider and saves the local media metadata. - 2. Generates thumbnails. - 3. Updates the Open Graph response based on image properties. - 6. If the media is JSON and an oEmbed URL was found: - 1. Convert the oEmbed response to an Open Graph response. - 2. If a thumbnail or image is in the oEmbed response: - 1. Downloads the URL and stores it into a file via the media storage - provider and saves the local media metadata. - 2. Generates thumbnails. - 3. Updates the Open Graph response based on image properties. - 7. Stores the result in the database cache. -4. Returns the result. - -The in-memory cache expires after 1 hour. - -Expired entries in the database cache (and their associated media files) are -deleted every 10 seconds. The default expiration time is 1 hour from download. diff --git a/docs/jwt.md b/docs/jwt.md index 346daf78ad..2e262583a7 100644 --- a/docs/jwt.md +++ b/docs/jwt.md @@ -37,27 +37,26 @@ As with other login types, there are additional fields (e.g. `device_id` and ## Preparing Synapse The JSON Web Token integration in Synapse uses the -[`PyJWT`](https://pypi.org/project/pyjwt/) library, which must be installed +[`Authlib`](https://docs.authlib.org/en/latest/index.html) library, which must be installed as follows: - * The relevant libraries are included in the Docker images and Debian packages - provided by `matrix.org` so no further action is needed. +* The relevant libraries are included in the Docker images and Debian packages + provided by `matrix.org` so no further action is needed. - * If you installed Synapse into a virtualenv, run `/path/to/env/bin/pip - install synapse[pyjwt]` to install the necessary dependencies. +* If you installed Synapse into a virtualenv, run `/path/to/env/bin/pip + install synapse[jwt]` to install the necessary dependencies. - * For other installation mechanisms, see the documentation provided by the - maintainer. +* For other installation mechanisms, see the documentation provided by the + maintainer. -To enable the JSON web token integration, you should then add an `jwt_config` section -to your configuration file (or uncomment the `enabled: true` line in the -existing section). See [sample_config.yaml](./sample_config.yaml) for some +To enable the JSON web token integration, you should then add a `jwt_config` option +to your configuration file. See the [configuration manual](usage/configuration/config_documentation.md#jwt_config) for some sample settings. ## How to test JWT as a developer Although JSON Web Tokens are typically generated from an external server, the -examples below use [PyJWT](https://pyjwt.readthedocs.io/en/latest/) directly. +example below uses a locally generated JWT. 1. Configure Synapse with JWT logins, note that this example uses a pre-shared secret and an algorithm of HS256: @@ -70,10 +69,21 @@ examples below use [PyJWT](https://pyjwt.readthedocs.io/en/latest/) directly. ``` 2. Generate a JSON web token: - ```bash - $ pyjwt --key=my-secret-token --alg=HS256 encode sub=test-user - eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ0ZXN0LXVzZXIifQ.Ag71GT8v01UO3w80aqRPTeuVPBIBZkYhNTJJ-_-zQIc + You can use the following short Python snippet to generate a JWT + protected by an HMAC. + Take care that the `secret` and the algorithm given in the `header` match + the entries from `jwt_config` above. + + ```python + from authlib.jose import jwt + + header = {"alg": "HS256"} + payload = {"sub": "user1", "aud": ["audience"]} + secret = "my-secret-token" + result = jwt.encode(header, payload, secret) + print(result.decode("ascii")) ``` + 3. Query for the login types and ensure `org.matrix.login.jwt` is there: ```bash diff --git a/docs/manhole.md b/docs/manhole.md index a82fad0f0f..4e5bf833ce 100644 --- a/docs/manhole.md +++ b/docs/manhole.md @@ -13,8 +13,10 @@ environments where untrusted users have shell access. ## Configuring the manhole -To enable it, first uncomment the `manhole` listener configuration in -`homeserver.yaml`. The configuration is slightly different if you're using docker. +To enable it, first add the `manhole` listener configuration in your +`homeserver.yaml`. You can find information on how to do that +in the [configuration manual](usage/configuration/config_documentation.md#manhole_settings). +The configuration is slightly different if you're using docker. #### Docker config diff --git a/docs/media_repository.md b/docs/media_repository.md index ba17f8a856..23e6da7f31 100644 --- a/docs/media_repository.md +++ b/docs/media_repository.md @@ -7,8 +7,7 @@ The media repository users. * caches avatars, attachments and their thumbnails for media uploaded by remote users. - * caches resources and thumbnails used for - [URL previews](development/url_previews.md). + * caches resources and thumbnails used for URL previews. All media in Matrix can be identified by a unique [MXC URI](https://spec.matrix.org/latest/client-server-api/#matrix-content-mxc-uris), @@ -59,8 +58,6 @@ remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg Note that `remote_thumbnail/` does not have an `s`. ## URL Previews -See [URL Previews](development/url_previews.md) for documentation on the URL preview -process. When generating previews for URLs, Synapse may download and cache various resources, including images. These resources are assigned temporary media IDs diff --git a/docs/message_retention_policies.md b/docs/message_retention_policies.md index b52c4aaa24..8c88f93935 100644 --- a/docs/message_retention_policies.md +++ b/docs/message_retention_policies.md @@ -49,9 +49,9 @@ clients. ## Server configuration -Support for this feature can be enabled and configured in the -`retention` section of the Synapse configuration file (see the -[sample file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518)). +Support for this feature can be enabled and configured by adding a the +`retention` in the Synapse configuration file (see +[configuration manual](usage/configuration/config_documentation.md#retention)). To enable support for message retention policies, set the setting `enabled` in this section to `true`. @@ -65,8 +65,8 @@ message retention policy configured in its state. This allows server admins to ensure that messages are never kept indefinitely in a server's database. -A default policy can be defined as such, in the `retention` section of -the configuration file: +A default policy can be defined as such, by adding the `retention` option in +the configuration file and adding these sub-options: ```yaml default_policy: @@ -86,8 +86,8 @@ Purge jobs are the jobs that Synapse runs in the background to purge expired events from the database. They are only run if support for message retention policies is enabled in the server's configuration. If no configuration for purge jobs is configured by the server admin, -Synapse will use a default configuration, which is described in the -[sample configuration file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518). +Synapse will use a default configuration, which is described here in the +[configuration manual](usage/configuration/config_documentation.md#retention). Some server admins might want a finer control on when events are removed depending on an event's room's policy. This can be done by setting the @@ -137,8 +137,8 @@ the server's database. ### Lifetime limits Server admins can set limits on the values of `max_lifetime` to use when -purging old events in a room. These limits can be defined as such in the -`retention` section of the configuration file: +purging old events in a room. These limits can be defined under the +`retention` option in the configuration file: ```yaml allowed_lifetime_min: 1d diff --git a/docs/openid.md b/docs/openid.md index 9d615a5737..d0ccf36f71 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -45,8 +45,8 @@ as follows: maintainer. To enable the OpenID integration, you should then add a section to the `oidc_providers` -setting in your configuration file (or uncomment one of the existing examples). -See [sample_config.yaml](./sample_config.yaml) for some sample settings, as well as +setting in your configuration file. +See the [configuration manual](usage/configuration/config_documentation.md#oidc_providers) for some sample settings, as well as the text below for example configurations for specific providers. ## Sample configs diff --git a/docs/opentracing.md b/docs/opentracing.md index f91362f112..abb94b565f 100644 --- a/docs/opentracing.md +++ b/docs/opentracing.md @@ -57,8 +57,9 @@ https://www.jaegertracing.io/docs/latest/getting-started. ## Enable OpenTracing in Synapse OpenTracing is not enabled by default. It must be enabled in the -homeserver config by uncommenting the config options under `opentracing` -as shown in the [sample config](./sample_config.yaml). For example: +homeserver config by adding the `opentracing` option to your config file. You can find +documentation about how to do this in the [config manual under the header 'Opentracing'](usage/configuration/config_documentation.md#opentracing). +See below for an example Opentracing configuration: ```yaml opentracing: diff --git a/docs/postgres.md b/docs/postgres.md index cbc32e1836..f2519f6b0a 100644 --- a/docs/postgres.md +++ b/docs/postgres.md @@ -143,6 +143,14 @@ to do step 2. It is safe to at any time kill the port script and restart it. +However, under no circumstances should the SQLite database be `VACUUM`ed between +multiple runs of the script. Doing so can lead to an inconsistent copy of your database +into Postgres. +To avoid accidental error, the script will check that SQLite's `auto_vacuum` mechanism +is disabled, but the script is not able to protect against a manual `VACUUM` operation +performed either by the administrator or by any automated task that the administrator +may have configured. + Note that the database may take up significantly more (25% - 100% more) space on disk after porting to Postgres. diff --git a/docs/setup/forward_proxy.md b/docs/setup/forward_proxy.md index 494c14893b..3482691f83 100644 --- a/docs/setup/forward_proxy.md +++ b/docs/setup/forward_proxy.md @@ -66,8 +66,8 @@ in Synapse can be deactivated. **NOTE**: This has an impact on security and is for testing purposes only! -To deactivate the certificate validation, the following setting must be made in -[homserver.yaml](../usage/configuration/homeserver_sample_config.md). +To deactivate the certificate validation, the following setting must be added to +your [homserver.yaml](../usage/configuration/homeserver_sample_config.md). ```yaml use_insecure_ssl_client_just_for_testing_do_not_use: true diff --git a/docs/setup/installation.md b/docs/setup/installation.md index 69ade036c3..260e50577b 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -84,20 +84,19 @@ file when you upgrade the Debian package to a later version. ##### Downstream Debian packages -We do not recommend using the packages from the default Debian `buster` -repository at this time, as they are old and suffer from known security -vulnerabilities. You can install the latest version of Synapse from -[our repository](#matrixorg-packages) or from `buster-backports`. Please -see the [Debian documentation](https://backports.debian.org/Instructions/) -for information on how to use backports. - -If you are using Debian `sid` or testing, Synapse is available in the default -repositories and it should be possible to install it simply with: +Andrej Shadura maintains a `matrix-synapse` package in the Debian repositories. +For `bookworm` and `sid`, it can be installed simply with: ```sh sudo apt install matrix-synapse ``` +Synapse is also avaliable in `bullseye-backports`. Please +see the [Debian documentation](https://backports.debian.org/Instructions/) +for information on how to use backports. + +`matrix-synapse` is no longer maintained for `buster` and older. + ##### Downstream Ubuntu packages We do not recommend using the packages in the default Ubuntu repository @@ -233,7 +232,9 @@ python -m synapse.app.homeserver \ --report-stats=[yes|no] ``` -... substituting an appropriate value for `--server-name`. +... substituting an appropriate value for `--server-name` and choosing whether +or not to report usage statistics (hostname, Synapse version, uptime, total +users, etc.) to the developers via the `--report-stats` argument. This command will generate you a config file that you can then customise, but it will also generate a set of keys for you. These keys will allow your homeserver to @@ -406,11 +407,11 @@ The recommended way to do so is to set up a reverse proxy on port Alternatively, you can configure Synapse to expose an HTTPS port. To do so, you will need to edit `homeserver.yaml`, as follows: -- First, under the `listeners` section, uncomment the configuration for the - TLS-enabled listener. (Remove the hash sign (`#`) at the start of - each line). The relevant lines are like this: +- First, under the `listeners` option, add the configuration for the + TLS-enabled listener like so: ```yaml +listeners: - port: 8448 type: http tls: true @@ -418,9 +419,11 @@ so, you will need to edit `homeserver.yaml`, as follows: - names: [client, federation] ``` -- You will also need to uncomment the `tls_certificate_path` and - `tls_private_key_path` lines under the `TLS` section. You will need to manage - provisioning of these certificates yourself. +- You will also need to add the options `tls_certificate_path` and + `tls_private_key_path`. to your configuration file. You will need to manage provisioning of + these certificates yourself. +- You can find more information about these options as well as how to configure synapse in the + [configuration manual](../usage/configuration/config_documentation.md). If you are using your own certificate, be sure to use a `.pem` file that includes the full certificate chain including any intermediate certificates diff --git a/docs/upgrade.md b/docs/upgrade.md index 312f0b87fe..fadb8e7ffb 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -89,6 +89,40 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.64.0 + +## Delegation of email validation no longer supported + +As of this version, Synapse no longer allows the tasks of verifying email address +ownership, and password reset confirmation, to be delegated to an identity server. + +To continue to allow users to add email addresses to their homeserver accounts, +and perform password resets, make sure that Synapse is configured with a +working email server in the `email` configuration section (including, at a +minimum, a `notif_from` setting.) + +Specifying an `email` setting under `account_threepid_delegates` will now cause +an error at startup. + +## Changes to the event replication streams + +Synapse now includes a flag indicating if an event is an outlier when +replicating it to other workers. This is a forwards- and backwards-incompatible +change: v1.63 and workers cannot process events replicated by v1.64 workers, and +vice versa. + +Once all workers are upgraded to v1.64 (or downgraded to v1.63), event +replication will resume as normal. + +## frozendict release + +[frozendict 2.3.3](https://github.com/Marco-Sulla/python-frozendict/releases/tag/v2.3.3) +has recently been released, which fixes a memory leak that occurs during `/sync` +requests. We advise server administrators who installed Synapse via pip to upgrade +frozendict with `pip install --upgrade frozendict`. The Docker image +`matrixdotorg/synapse` and the Debian packages from `packages.matrix.org` already +include the updated library. + # Upgrading to v1.62.0 ## New signatures for spam checker callbacks diff --git a/docs/usage/administration/admin_api/README.md b/docs/usage/administration/admin_api/README.md index 3cbedc5dfa..c60b6da0de 100644 --- a/docs/usage/administration/admin_api/README.md +++ b/docs/usage/administration/admin_api/README.md @@ -18,6 +18,11 @@ already on your `$PATH` depending on how Synapse was installed. Finding your user's `access_token` is client-dependent, but will usually be shown in the client's settings. ## Making an Admin API request +For security reasons, we [recommend](reverse_proxy.md#synapse-administration-endpoints) +that the Admin API (`/_synapse/admin/...`) should be hidden from public view using a +reverse proxy. This means you should typically query the Admin API from a terminal on +the machine which runs Synapse. + Once you have your `access_token`, you will need to authenticate each request to an Admin API endpoint by providing the token as either a query parameter or a request header. To add it as a request header in cURL: @@ -25,5 +30,17 @@ providing the token as either a query parameter or a request header. To add it a curl --header "Authorization: Bearer <access_token>" <the_rest_of_your_API_request> ``` +For example, suppose we want to +[query the account](user_admin_api.md#query-user-account) of the user +`@foo:bar.com`. We need an admin access token (e.g. +`syt_AjfVef2_L33JNpafeif_0feKJfeaf0CQpoZk`), and we need to know which port +Synapse's [`client` listener](config_documentation.md#listeners) is listening +on (e.g. `8008`). Then we can use the following command to request the account +information from the Admin API. + +```sh +curl --header "Authorization: Bearer syt_AjfVef2_L33JNpafeif_0feKJfeaf0CQpoZk" -X GET http://127.0.0.1:8008/_synapse/admin/v2/users/@foo:bar.com +``` + For more details on access tokens in Matrix, please refer to the complete [matrix spec documentation](https://matrix.org/docs/spec/client_server/r0.6.1#using-access-tokens). diff --git a/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md new file mode 100644 index 0000000000..4e53f9883a --- /dev/null +++ b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md @@ -0,0 +1,81 @@ +# Reporting Homeserver Usage Statistics + +When generating your Synapse configuration file, you are asked whether you +would like to report usage statistics to Matrix.org. These statistics +provide the foundation a glimpse into the number of Synapse homeservers +participating in the network, as well as statistics such as the number of +rooms being created and messages being sent. This feature is sometimes +affectionately called "phone home" stats. Reporting +[is optional](../../configuration/config_documentation.md#report_stats) +and the reporting endpoint +[can be configured](../../configuration/config_documentation.md#report_stats_endpoint), +in case you would like to instead report statistics from a set of homeservers +to your own infrastructure. + +This documentation aims to define the statistics available and the +homeserver configuration options that exist to tweak it. + +## Available Statistics + +The following statistics are sent to the configured reporting endpoint: + +| Statistic Name | Type | Description | +|----------------------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `homeserver` | string | The homeserver's server name. | +| `memory_rss` | int | The memory usage of the process (in kilobytes on Unix-based systems, bytes on MacOS). | +| `cpu_average` | int | CPU time in % of a single core (not % of all cores). | +| `server_context` | string | An arbitrary string used to group statistics from a set of homeservers. | +| `timestamp` | int | The current time, represented as the number of seconds since the epoch. | +| `uptime_seconds` | int | The number of seconds since the homeserver was last started. | +| `python_version` | string | The Python version number in use (e.g "3.7.1"). Taken from `sys.version_info`. | +| `total_users` | int | The number of registered users on the homeserver. | +| `total_nonbridged_users` | int | The number of users, excluding those created by an Application Service. | +| `daily_user_type_native` | int | The number of native users created in the last 24 hours. | +| `daily_user_type_guest` | int | The number of guest users created in the last 24 hours. | +| `daily_user_type_bridged` | int | The number of users created by Application Services in the last 24 hours. | +| `total_room_count` | int | The total number of rooms present on the homeserver. | +| `daily_active_users` | int | The number of unique users[^1] that have used the homeserver in the last 24 hours. | +| `monthly_active_users` | int | The number of unique users[^1] that have used the homeserver in the last 30 days. | +| `daily_active_rooms` | int | The number of rooms that have had a (state) event with the type `m.room.message` sent in them in the last 24 hours. | +| `daily_active_e2ee_rooms` | int | The number of rooms that have had a (state) event with the type `m.room.encrypted` sent in them in the last 24 hours. | +| `daily_messages` | int | The number of (state) events with the type `m.room.message` seen in the last 24 hours. | +| `daily_e2ee_messages` | int | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours. | +| `daily_sent_messages` | int | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours. | +| `daily_sent_e2ee_messages` | int | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours. | +| `r30_users_all` | int | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types. | +| `r30_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string. | +| `r30_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string. | +| `r30_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string. | +| `r30_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string. | +| `r30v2_users_all` | int | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. | +| `r30v2_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string. | +| `r30v2_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string. | +| `r30v2_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "electron" (case-insensitive) in the user agent string. | +| `r30v2_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "mozilla" or "gecko" (case-insensitive) in the user agent string. | +| `cache_factor` | int | The configured [`global factor`](../../configuration/config_documentation.md#caching) value for caching. | +| `event_cache_size` | int | The configured [`event_cache_size`](../../configuration/config_documentation.md#caching) value for caching. | +| `database_engine` | string | The database engine that is in use. Either "psycopg2" meaning PostgreSQL is in use, or "sqlite3" for SQLite3. | +| `database_server_version` | string | The version of the database server. Examples being "10.10" for PostgreSQL server version 10.0, and "3.38.5" for SQLite 3.38.5 installed on the system. | +| `log_level` | string | The log level in use. Examples are "INFO", "WARNING", "ERROR", "DEBUG", etc. | + + +[^1]: Native matrix users and guests are always counted. If the +[`track_puppeted_user_ips`](../../configuration/config_documentation.md#track_puppeted_user_ips) +option is set to `true`, "puppeted" users (users that an Application Service have performed +[an action on behalf of](https://spec.matrix.org/v1.3/application-service-api/#identity-assertion)) +will also be counted. Note that an Application Service can "puppet" any user in their +[user namespace](https://spec.matrix.org/v1.3/application-service-api/#registration), +not only users that the Application Service has created. If this happens, the Application Service +will additionally be counted as a user (irrespective of `track_puppeted_user_ips`). + +## Using a Custom Statistics Collection Server + +If statistics reporting is enabled, the endpoint that Synapse sends metrics to is configured by the +[`report_stats_endpoint`](../../configuration/config_documentation.md#report_stats_endpoint) config +option. By default, statistics are sent to Matrix.org. + +If you would like to set up your own statistics collection server and send metrics there, you may +consider using one of the following known implementations: + +* [Matrix.org's Panopticon](https://github.com/matrix-org/panopticon) +* [Famedly's Barad-dûr](https://gitlab.com/famedly/company/devops/services/barad-dur) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 392ae80a75..a10f6662eb 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -67,7 +67,7 @@ apply if you want your config file to be read properly. A few helpful things to enabled: false ``` In this manual, all top-level settings (ones with no indentation) are identified - at the beginning of their section (i.e. "Config option: `example_setting`") and + at the beginning of their section (i.e. "### `example_setting`") and the sub-options, if any, are identified and listed in the body of the section. In addition, each setting has an example of its usage, with the proper indentation shown. @@ -124,7 +124,7 @@ documentation on how to configure or create custom modules for Synapse. --- -Config option: `modules` +### `modules` Use the `module` sub-option to add modules under this option to extend functionality. The `module` setting then has a sub-option, `config`, which can be used to define some configuration @@ -147,7 +147,7 @@ modules: Define your homeserver name and other base options. --- -Config option: `server_name` +### `server_name` This sets the public-facing domain of the server. @@ -177,7 +177,7 @@ Example configuration #2: server_name: localhost:8080 ``` --- -Config option: `pid_file` +### `pid_file` When running Synapse as a daemon, the file to store the pid in. Defaults to none. @@ -186,7 +186,7 @@ Example configuration: pid_file: DATADIR/homeserver.pid ``` --- -Config option: `web_client_location` +### `web_client_location` The absolute URL to the web client which `/` will redirect to. Defaults to none. @@ -195,7 +195,7 @@ Example configuration: web_client_location: https://riot.example.com/ ``` --- -Config option: `public_baseurl` +### `public_baseurl` The public-facing base URL that clients use to access this Homeserver (not including _matrix/...). This is the same URL a user might enter into the @@ -211,7 +211,7 @@ Example configuration: public_baseurl: https://example.com/ ``` --- -Config option: `serve_server_wellknown` +### `serve_server_wellknown` By default, other servers will try to reach our server on port 8448, which can be inconvenient in some environments. @@ -230,7 +230,25 @@ Example configuration: serve_server_wellknown: true ``` --- -Config option: `soft_file_limit` +### `extra_well_known_client_content ` + +This option allows server runners to add arbitrary key-value pairs to the [client-facing `.well-known` response](https://spec.matrix.org/latest/client-server-api/#well-known-uri). +Note that the `public_baseurl` config option must be provided for Synapse to serve a response to `/.well-known/matrix/client` at all. + +If this option is provided, it parses the given yaml to json and +serves it on `/.well-known/matrix/client` endpoint +alongside the standard properties. + +*Added in Synapse 1.62.0.* + +Example configuration: +```yaml +extra_well_known_client_content : + option1: value1 + option2: value2 +``` +--- +### `soft_file_limit` Set the soft limit on the number of file descriptors synapse can use. Zero is used to indicate synapse should set the soft limit to the hard limit. @@ -241,7 +259,7 @@ Example configuration: soft_file_limit: 3 ``` --- -Config option: `presence` +### `presence` Presence tracking allows users to see the state (e.g online/offline) of other local and remote users. Set the `enabled` sub-option to false to @@ -254,7 +272,7 @@ presence: enabled: false ``` --- -Config option: `require_auth_for_profile_requests` +### `require_auth_for_profile_requests` Whether to require authentication to retrieve profile data (avatars, display names) of other users through the client API. Defaults to false. Note that profile data is also available @@ -265,7 +283,7 @@ Example configuration: require_auth_for_profile_requests: true ``` --- -Config option: `limit_profile_requests_to_users_who_share_rooms` +### `limit_profile_requests_to_users_who_share_rooms` Use this option to require a user to share a room with another user in order to retrieve their profile information. Only checked on Client-Server @@ -277,7 +295,7 @@ Example configuration: limit_profile_requests_to_users_who_share_rooms: true ``` --- -Config option: `include_profile_data_on_invite` +### `include_profile_data_on_invite` Use this option to prevent a user's profile data from being retrieved and displayed in a room until they have joined it. By default, a user's @@ -290,7 +308,7 @@ Example configuration: include_profile_data_on_invite: false ``` --- -Config option: `allow_public_rooms_without_auth` +### `allow_public_rooms_without_auth` If set to true, removes the need for authentication to access the server's public rooms directory through the client API, meaning that anyone can @@ -301,7 +319,7 @@ Example configuration: allow_public_rooms_without_auth: true ``` --- -Config option: `allow_public_rooms_without_auth` +### `allow_public_rooms_over_federation` If set to true, allows any other homeserver to fetch the server's public rooms directory via federation. Defaults to false. @@ -311,7 +329,7 @@ Example configuration: allow_public_rooms_over_federation: true ``` --- -Config option: `default_room_version` +### `default_room_version` The default room version for newly created rooms on this server. @@ -327,7 +345,7 @@ Example configuration: default_room_version: "8" ``` --- -Config option: `gc_thresholds` +### `gc_thresholds` The garbage collection threshold parameters to pass to `gc.set_threshold`, if defined. Defaults to none. @@ -337,7 +355,7 @@ Example configuration: gc_thresholds: [700, 10, 10] ``` --- -Config option: `gc_min_interval` +### `gc_min_interval` The minimum time in seconds between each GC for a generation, regardless of the GC thresholds. This ensures that we don't do GC too frequently. A value of `[1s, 10s, 30s]` @@ -350,7 +368,7 @@ Example configuration: gc_min_interval: [0.5s, 30s, 1m] ``` --- -Config option: `filter_timeline_limit` +### `filter_timeline_limit` Set the limit on the returned events in the timeline in the get and sync operations. Defaults to 100. A value of -1 means no upper limit. @@ -361,7 +379,7 @@ Example configuration: filter_timeline_limit: 5000 ``` --- -Config option: `block_non_admin_invites` +### `block_non_admin_invites` Whether room invites to users on this server should be blocked (except those sent by local server admins). Defaults to false. @@ -371,7 +389,7 @@ Example configuration: block_non_admin_invites: true ``` --- -Config option: `enable_search` +### `enable_search` If set to false, new messages will not be indexed for searching and users will receive errors when searching for messages. Defaults to true. @@ -381,7 +399,7 @@ Example configuration: enable_search: false ``` --- -Config option: `ip_range_blacklist` +### `ip_range_blacklist` This option prevents outgoing requests from being sent to the specified blacklisted IP address CIDR ranges. If this option is not specified then it defaults to private IP @@ -421,7 +439,7 @@ ip_range_blacklist: - 'fec0::/10' ``` --- -Config option: `ip_range_whitelist` +### `ip_range_whitelist` List of IP address CIDR ranges that should be allowed for federation, identity servers, push servers, and for checking key validity for @@ -438,7 +456,7 @@ ip_range_whitelist: - '192.168.1.1' ``` --- -Config option: `listeners` +### `listeners` List of ports that Synapse should listen on, their purpose and their configuration. @@ -539,7 +557,7 @@ listeners: type: manhole ``` --- -Config option: `manhole_settings` +### `manhole_settings` Connection settings for the manhole. You can find more information on the manhole [here](../../manhole.md). Manhole sub-options include: @@ -558,7 +576,7 @@ manhole_settings: ssh_pub_key_path: CONFDIR/id_rsa.pub ``` --- -Config option: `dummy_events_threshold` +### `dummy_events_threshold` Forward extremities can build up in a room due to networking delays between homeservers. Once this happens in a large room, calculation of the state of @@ -575,7 +593,7 @@ Example configuration: dummy_events_threshold: 5 ``` --- -Config option `delete_stale_devices_after` +### `delete_stale_devices_after` An optional duration. If set, Synapse will run a daily background task to log out and delete any device that hasn't been accessed for more than the specified amount of time. @@ -592,7 +610,7 @@ Useful options for Synapse admins. --- -Config option: `admin_contact` +### `admin_contact` How to reach the server admin, used in `ResourceLimitError`. Defaults to none. @@ -601,7 +619,7 @@ Example configuration: admin_contact: 'mailto:admin@server.com' ``` --- -Config option: `hs_disabled` and `hs_disabled_message` +### `hs_disabled` and `hs_disabled_message` Blocks users from connecting to the homeserver and provides a human-readable reason why the connection was blocked. Defaults to false. @@ -612,7 +630,7 @@ hs_disabled: true hs_disabled_message: 'Reason for why the HS is blocked' ``` --- -Config option: `limit_usage_by_mau` +### `limit_usage_by_mau` This option disables/enables monthly active user blocking. Used in cases where the admin or server owner wants to limit to the number of monthly active users. When enabled and a limit is @@ -624,7 +642,7 @@ Example configuration: limit_usage_by_mau: true ``` --- -Config option: `max_mau_value` +### `max_mau_value` This option sets the hard limit of monthly active users above which the server will start blocking user actions if `limit_usage_by_mau` is enabled. Defaults to 0. @@ -634,7 +652,7 @@ Example configuration: max_mau_value: 50 ``` --- -Config option: `mau_trial_days` +### `mau_trial_days` The option `mau_trial_days` is a means to add a grace period for active users. It means that users must be active for the specified number of days before they @@ -647,7 +665,7 @@ Example configuration: mau_trial_days: 5 ``` --- -Config option: `mau_appservice_trial_days` +### `mau_appservice_trial_days` The option `mau_appservice_trial_days` is similar to `mau_trial_days`, but applies a different trial number if the user was registered by an appservice. A value @@ -661,7 +679,7 @@ mau_appservice_trial_days: another_appservice_id: 6 ``` --- -Config option: `mau_limit_alerting` +### `mau_limit_alerting` The option `mau_limit_alerting` is a means of limiting client-side alerting should the mau limit be reached. This is useful for small instances @@ -674,7 +692,7 @@ Example configuration: mau_limit_alerting: false ``` --- -Config option: `mau_stats_only` +### `mau_stats_only` If enabled, the metrics for the number of monthly active users will be populated, however no one will be limited based on these numbers. If `limit_usage_by_mau` @@ -685,7 +703,7 @@ Example configuration: mau_stats_only: true ``` --- -Config option: `mau_limit_reserved_threepids` +### `mau_limit_reserved_threepids` Sometimes the server admin will want to ensure certain accounts are never blocked by mau checking. These accounts are specified by this option. @@ -699,7 +717,7 @@ mau_limit_reserved_threepids: address: 'reserved_user@example.com' ``` --- -Config option: `server_context` +### `server_context` This option is used by phonehome stats to group together related servers. Defaults to none. @@ -709,7 +727,7 @@ Example configuration: server_context: context ``` --- -Config option: `limit_remote_rooms` +### `limit_remote_rooms` When this option is enabled, the room "complexity" will be checked before a user joins a new remote room. If it is above the complexity limit, the server will @@ -733,7 +751,7 @@ limit_remote_rooms: admins_can_join: true ``` --- -Config option: `require_membership_for_aliases` +### `require_membership_for_aliases` Whether to require a user to be in the room to add an alias to it. Defaults to true. @@ -743,7 +761,7 @@ Example configuration: require_membership_for_aliases: false ``` --- -Config option: `allow_per_room_profiles` +### `allow_per_room_profiles` Whether to allow per-room membership profiles through the sending of membership events with profile information that differs from the target's global profile. @@ -754,7 +772,7 @@ Example configuration: allow_per_room_profiles: false ``` --- -Config option: `max_avatar_size` +### `max_avatar_size` The largest permissible file size in bytes for a user avatar. Defaults to no restriction. Use M for MB and K for KB. @@ -766,7 +784,7 @@ Example configuration: max_avatar_size: 10M ``` --- -Config option: `allowed_avatar_mimetypes` +### `allowed_avatar_mimetypes` The MIME types allowed for user avatars. Defaults to no restriction. @@ -778,7 +796,7 @@ Example configuration: allowed_avatar_mimetypes: ["image/png", "image/jpeg", "image/gif"] ``` --- -Config option: `redaction_retention_period` +### `redaction_retention_period` How long to keep redacted events in unredacted form in the database. After this period redacted events get replaced with their redacted form in the DB. @@ -790,7 +808,7 @@ Example configuration: redaction_retention_period: 28d ``` --- -Config option: `user_ips_max_age` +### `user_ips_max_age` How long to track users' last seen time and IPs in the database. @@ -801,7 +819,7 @@ Example configuration: user_ips_max_age: 14d ``` --- -Config option: `request_token_inhibit_3pid_errors` +### `request_token_inhibit_3pid_errors` Inhibits the `/requestToken` endpoints from returning an error that might leak information about whether an e-mail address is in use or not on this @@ -816,7 +834,7 @@ Example configuration: request_token_inhibit_3pid_errors: true ``` --- -Config option: `next_link_domain_whitelist` +### `next_link_domain_whitelist` A list of domains that the domain portion of `next_link` parameters must match. @@ -838,7 +856,7 @@ Example configuration: next_link_domain_whitelist: ["matrix.org"] ``` --- -Config option: `templates` and `custom_template_directory` +### `templates` and `custom_template_directory` These options define templates to use when generating email or HTML page contents. The `custom_template_directory` determines which directory Synapse will try to @@ -855,7 +873,7 @@ templates: custom_template_directory: /path/to/custom/templates/ ``` --- -Config option: `retention` +### `retention` This option and the associated options determine message retention policy at the server level. @@ -934,7 +952,7 @@ retention: Options related to TLS. --- -Config option: `tls_certificate_path` +### `tls_certificate_path` This option specifies a PEM-encoded X509 certificate for TLS. This certificate, as of Synapse 1.0, will need to be a valid and verifiable @@ -949,7 +967,7 @@ Example configuration: tls_certificate_path: "CONFDIR/SERVERNAME.tls.crt" ``` --- -Config option: `tls_private_key_path` +### `tls_private_key_path` PEM-encoded private key for TLS. Defaults to none. @@ -958,7 +976,7 @@ Example configuration: tls_private_key_path: "CONFDIR/SERVERNAME.tls.key" ``` --- -Config option: `federation_verify_certificates` +### `federation_verify_certificates` Whether to verify TLS server certificates for outbound federation requests. Defaults to true. To disable certificate verification, set the option to false. @@ -968,7 +986,7 @@ Example configuration: federation_verify_certificates: false ``` --- -Config option: `federation_client_minimum_tls_version` +### `federation_client_minimum_tls_version` The minimum TLS version that will be used for outbound federation requests. @@ -982,7 +1000,7 @@ Example configuration: federation_client_minimum_tls_version: 1.2 ``` --- -Config option: `federation_certificate_verification_whitelist` +### `federation_certificate_verification_whitelist` Skip federation certificate verification on a given whitelist of domains. @@ -1001,7 +1019,7 @@ federation_certificate_verification_whitelist: - "*.onion" ``` --- -Config option: `federation_custom_ca_list` +### `federation_custom_ca_list` List of custom certificate authorities for federation traffic. @@ -1024,7 +1042,7 @@ federation_custom_ca_list: Options related to federation. --- -Config option: `federation_domain_whitelist` +### `federation_domain_whitelist` Restrict federation to the given whitelist of domains. N.B. we recommend also firewalling your federation listener to limit @@ -1040,7 +1058,7 @@ federation_domain_whitelist: - syd.example.com ``` --- -Config option: `federation_metrics_domains` +### `federation_metrics_domains` Report prometheus metrics on the age of PDUs being sent to and received from the given domains. This can be used to give an idea of "delay" on inbound @@ -1056,7 +1074,7 @@ federation_metrics_domains: - example.com ``` --- -Config option: `allow_profile_lookup_over_federation` +### `allow_profile_lookup_over_federation` Set to false to disable profile lookup over federation. By default, the Federation API allows other homeservers to obtain profile data of any user @@ -1067,7 +1085,7 @@ Example configuration: allow_profile_lookup_over_federation: false ``` --- -Config option: `allow_device_name_lookup_over_federation` +### `allow_device_name_lookup_over_federation` Set this option to true to allow device display name lookup over federation. By default, the Federation API prevents other homeservers from obtaining the display names of any user devices @@ -1083,7 +1101,7 @@ allow_device_name_lookup_over_federation: true Options related to caching --- -Config option: `event_cache_size` +### `event_cache_size` The number of events to cache in memory. Not affected by `caches.global_factor`. Defaults to 10K. @@ -1093,7 +1111,7 @@ Example configuration: event_cache_size: 15K ``` --- -Config option: `cache` and associated values +### `cache` and associated values A cache 'factor' is a multiplier that can be applied to each of Synapse's caches in order to increase or decrease the maximum @@ -1137,8 +1155,11 @@ Caching can be configured through the following sub-options: * `sync_response_cache_duration`: Controls how long the results of a /sync request are cached for after a successful response is returned. A higher duration can help clients with intermittent connections, at the cost of higher memory usage. - By default, this is zero, which means that sync responses are not cached - at all. + A value of zero means that sync responses are not cached. + Defaults to 2m. + + *Changed in Synapse 1.62.0*: The default was changed from 0 to 2m. + * `cache_autotuning` and its sub-options `max_cache_memory_usage`, `target_cache_memory_usage`, and `min_cache_ttl` work in conjunction with each other to maintain a balance between cache memory usage and cache entry availability. You must be using [jemalloc](https://github.com/matrix-org/synapse#help-synapse-is-slow-and-eats-all-my-ramcpu) @@ -1190,7 +1211,7 @@ file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using Config options related to database settings. --- -Config option: `database` +### `database` The `database` setting defines the database that synapse uses to store all of its data. @@ -1241,11 +1262,103 @@ database: cp_max: 10 ``` --- +### `databases` + +The `databases` option allows specifying a mapping between certain database tables and +database host details, spreading the load of a single Synapse instance across multiple +database backends. This is often referred to as "database sharding". This option is only +supported for PostgreSQL database backends. + +**Important note:** This is a supported option, but is not currently used in production by the +Matrix.org Foundation. Proceed with caution and always make backups. + +`databases` is a dictionary of arbitrarily-named database entries. Each entry is equivalent +to the value of the `database` homeserver config option (see above), with the addition of +a `data_stores` key. `data_stores` is an array of strings that specifies the data store(s) +(a defined label for a set of tables) that should be stored on the associated database +backend entry. + +The currently defined values for `data_stores` are: + +* `"state"`: Database that relates to state groups will be stored in this database. + + Specifically, that means the following tables: + * `state_groups` + * `state_group_edges` + * `state_groups_state` + + And the following sequences: + * `state_groups_seq_id` + +* `"main"`: All other database tables and sequences. + +All databases will end up with additional tables used for tracking database schema migrations +and any pending background updates. Synapse will create these automatically on startup when checking for +and/or performing database schema migrations. + +To migrate an existing database configuration (e.g. all tables on a single database) to a different +configuration (e.g. the "main" data store on one database, and "state" on another), do the following: + +1. Take a backup of your existing database. Things can and do go wrong and database corruption is no joke! +2. Ensure all pending database migrations have been applied and background updates have run. The simplest + way to do this is to use the `update_synapse_database` script supplied with your Synapse installation. + + ```sh + update_synapse_database --database-config homeserver.yaml --run-background-updates + ``` + +3. Copy over the necessary tables and sequences from one database to the other. Tables relating to database + migrations, schemas, schema versions and background updates should **not** be copied. + + As an example, say that you'd like to split out the "state" data store from an existing database which + currently contains all data stores. + + Simply copy the tables and sequences defined above for the "state" datastore from the existing database + to the secondary database. As noted above, additional tables will be created in the secondary database + when Synapse is started. + +4. Modify/create the `databases` option in your `homeserver.yaml` to match the desired database configuration. +5. Start Synapse. Check that it starts up successfully and that things generally seem to be working. +6. Drop the old tables that were copied in step 3. + +Only one of the options `database` or `databases` may be specified in your config, but not both. + +Example configuration: + +```yaml +databases: + basement_box: + name: psycopg2 + txn_limit: 10000 + data_stores: ["main"] + args: + user: synapse_user + password: secretpassword + database: synapse_main + host: localhost + port: 5432 + cp_min: 5 + cp_max: 10 + + my_other_database: + name: psycopg2 + txn_limit: 10000 + data_stores: ["state"] + args: + user: synapse_user + password: secretpassword + database: synapse_state + host: localhost + port: 5432 + cp_min: 5 + cp_max: 10 +``` +--- ## Logging ## Config options related to logging. --- -Config option: `log_config` +### `log_config` This option specifies a yaml python logging config file as described [here](https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema). @@ -1261,7 +1374,7 @@ Each ratelimiting configuration is made of two parameters: - `per_second`: number of requests a client can send per second. - `burst_count`: number of requests a client can send before being throttled. --- -Config option: `rc_message` +### `rc_message` Ratelimiting settings for client messaging. @@ -1276,7 +1389,7 @@ rc_message: burst_count: 15 ``` --- -Config option: `rc_registration` +### `rc_registration` This option ratelimits registration requests based on the client's IP address. It defaults to `per_second: 0.17`, `burst_count: 3`. @@ -1288,7 +1401,7 @@ rc_registration: burst_count: 2 ``` --- -Config option: `rc_registration_token_validity` +### `rc_registration_token_validity` This option checks the validity of registration tokens that ratelimits requests based on the client's IP address. @@ -1301,7 +1414,7 @@ rc_registration_token_validity: burst_count: 6 ``` --- -Config option: `rc_login` +### `rc_login` This option specifies several limits for login: * `address` ratelimits login requests based on the client's IP @@ -1329,7 +1442,7 @@ rc_login: burst_count: 7 ``` --- -Config option: `rc_admin_redaction` +### `rc_admin_redaction` This option sets ratelimiting redactions by room admins. If this is not explicitly set then it uses the same ratelimiting as per `rc_message`. This is useful @@ -1342,7 +1455,7 @@ rc_admin_redaction: burst_count: 50 ``` --- -Config option: `rc_joins` +### `rc_joins` This option allows for ratelimiting number of rooms a user can join. This setting has the following sub-options: @@ -1364,7 +1477,26 @@ rc_joins: burst_count: 12 ``` --- -Config option: `rc_3pid_validation` +### `rc_joins_per_room` + +This option allows admins to ratelimit joins to a room based on the number of recent +joins (local or remote) to that room. It is intended to mitigate mass-join spam +waves which target multiple homeservers. + +By default, one join is permitted to a room every second, with an accumulating +buffer of up to ten instantaneous joins. + +Example configuration (default values): +```yaml +rc_joins_per_room: + per_second: 1 + burst_count: 10 +``` + +_Added in Synapse 1.64.0._ + +--- +### `rc_3pid_validation` This option ratelimits how often a user or IP can attempt to validate a 3PID. Defaults to `per_second: 0.003`, `burst_count: 5`. @@ -1376,7 +1508,7 @@ rc_3pid_validation: burst_count: 5 ``` --- -Config option: `rc_invites` +### `rc_invites` This option sets ratelimiting how often invites can be sent in a room or to a specific user. `per_room` defaults to `per_second: 0.3`, `burst_count: 10` and @@ -1396,6 +1528,10 @@ The `rc_invites.per_user` limit applies to the *receiver* of the invite, rather sender, meaning that a `rc_invite.per_user.burst_count` of 5 mandates that a single user cannot *receive* more than a burst of 5 invites at a time. +In contrast, the `rc_invites.per_issuer` limit applies to the *issuer* of the invite, meaning that a `rc_invite.per_issuer.burst_count` of 5 mandates that single user cannot *send* more than a burst of 5 invites at a time. + +_Changed in version 1.63:_ added the `per_issuer` limit. + Example configuration: ```yaml rc_invites: @@ -1405,9 +1541,13 @@ rc_invites: per_user: per_second: 0.004 burst_count: 3 + per_issuer: + per_second: 0.5 + burst_count: 5 ``` + --- -Config option: `rc_third_party_invite` +### `rc_third_party_invite` This option ratelimits 3PID invites (i.e. invites sent to a third-party ID such as an email address or a phone number) based on the account that's @@ -1420,7 +1560,7 @@ rc_third_party_invite: burst_count: 10 ``` --- -Config option: `rc_federation` +### `rc_federation` Defines limits on federation requests. @@ -1445,7 +1585,7 @@ rc_federation: concurrent: 5 ``` --- -Config option: `federation_rr_transactions_per_room_per_second` +### `federation_rr_transactions_per_room_per_second` Sets outgoing federation transaction frequency for sending read-receipts, per-room. @@ -1462,7 +1602,7 @@ federation_rr_transactions_per_room_per_second: 40 Config options related to Synapse's media store. --- -Config option: `enable_media_repo` +### `enable_media_repo` Enable the media store service in the Synapse master. Defaults to true. Set to false if you are using a separate media store worker. @@ -1472,7 +1612,7 @@ Example configuration: enable_media_repo: false ``` --- -Config option: `media_store_path` +### `media_store_path` Directory where uploaded images and attachments are stored. @@ -1481,7 +1621,7 @@ Example configuration: media_store_path: "DATADIR/media_store" ``` --- -Config option: `media_storage_providers` +### `media_storage_providers` Media storage providers allow media to be stored in different locations. Defaults to none. Associated sub-options are: @@ -1502,7 +1642,7 @@ media_storage_providers: directory: /mnt/some/other/directory ``` --- -Config option: `max_upload_size` +### `max_upload_size` The largest allowed upload size in bytes. @@ -1515,7 +1655,7 @@ Example configuration: max_upload_size: 60M ``` --- -Config option: `max_image_pixels` +### `max_image_pixels` Maximum number of pixels that will be thumbnailed. Defaults to 32M. @@ -1524,7 +1664,7 @@ Example configuration: max_image_pixels: 35M ``` --- -Config option: `dynamic_thumbnails` +### `dynamic_thumbnails` Whether to generate new thumbnails on the fly to precisely match the resolution requested by the client. If true then whenever @@ -1537,7 +1677,7 @@ Example configuration: dynamic_thumbnails: true ``` --- -Config option: `thumbnail_sizes` +### `thumbnail_sizes` List of thumbnails to precalculate when an image is uploaded. Associated sub-options are: * `width` @@ -1564,7 +1704,7 @@ thumbnail_sizes: method: scale ``` --- -Config option: `media_retention` +### `media_retention` Controls whether local media and entries in the remote media cache (media that is downloaded from other homeservers) should be removed @@ -1596,7 +1736,7 @@ media_retention: remote_media_lifetime: 14d ``` --- -Config option: `url_preview_enabled` +### `url_preview_enabled` This setting determines whether the preview URL API is enabled. It is disabled by default. Set to true to enable. If enabled you must specify a @@ -1607,7 +1747,7 @@ Example configuration: url_preview_enabled: true ``` --- -Config option: `url_preview_ip_range_blacklist` +### `url_preview_ip_range_blacklist` List of IP address CIDR ranges that the URL preview spider is denied from accessing. There are no defaults: you must explicitly @@ -1649,7 +1789,7 @@ url_preview_ip_range_blacklist: - 'fec0::/10' ``` ---- -Config option: `url_preview_ip_range_whitelist` +### `url_preview_ip_range_whitelist` This option sets a list of IP address CIDR ranges that the URL preview spider is allowed to access even if they are specified in `url_preview_ip_range_blacklist`. @@ -1663,7 +1803,7 @@ url_preview_ip_range_whitelist: - '192.168.1.1' ``` --- -Config option: `url_preview_url_blacklist` +### `url_preview_url_blacklist` Optional list of URL matches that the URL preview spider is denied from accessing. You should use `url_preview_ip_range_blacklist` @@ -1709,7 +1849,7 @@ url_preview_url_blacklist: - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' ``` --- -Config option: `max_spider_size` +### `max_spider_size` The largest allowed URL preview spidering size in bytes. Defaults to 10M. @@ -1718,7 +1858,7 @@ Example configuration: max_spider_size: 8M ``` --- -Config option: `url_preview_language` +### `url_preview_language` A list of values for the Accept-Language HTTP header used when downloading webpages during URL preview generation. This allows @@ -1743,7 +1883,7 @@ Example configuration: - '*;q=0.7' ``` ---- -Config option: `oembed` +### `oembed` oEmbed allows for easier embedding content from a website. It can be used for generating URLs previews of services which support it. A default list of oEmbed providers @@ -1764,7 +1904,7 @@ oembed: See [here](../../CAPTCHA_SETUP.md) for full details on setting up captcha. --- -Config option: `recaptcha_public_key` +### `recaptcha_public_key` This homeserver's ReCAPTCHA public key. Must be specified if `enable_registration_captcha` is enabled. @@ -1774,7 +1914,7 @@ Example configuration: recaptcha_public_key: "YOUR_PUBLIC_KEY" ``` --- -Config option: `recaptcha_private_key` +### `recaptcha_private_key` This homeserver's ReCAPTCHA private key. Must be specified if `enable_registration_captcha` is enabled. @@ -1784,7 +1924,7 @@ Example configuration: recaptcha_private_key: "YOUR_PRIVATE_KEY" ``` --- -Config option: `enable_registration_captcha` +### `enable_registration_captcha` Set to true to enable ReCaptcha checks when registering, preventing signup unless a captcha is answered. Requires a valid ReCaptcha public/private key. @@ -1795,7 +1935,7 @@ Example configuration: enable_registration_captcha: true ``` --- -Config option: `recaptcha_siteverify_api` +### `recaptcha_siteverify_api` The API endpoint to use for verifying `m.login.recaptcha` responses. Defaults to `https://www.recaptcha.net/recaptcha/api/siteverify`. @@ -1809,7 +1949,7 @@ recaptcha_siteverify_api: "https://my.recaptcha.site" Options related to adding a TURN server to Synapse. --- -Config option: `turn_uris` +### `turn_uris` The public URIs of the TURN server to give to clients. @@ -1818,7 +1958,7 @@ Example configuration: turn_uris: [turn:example.org] ``` --- -Config option: `turn_shared_secret` +### `turn_shared_secret` The shared secret used to compute passwords for the TURN server. @@ -1827,7 +1967,7 @@ Example configuration: turn_shared_secret: "YOUR_SHARED_SECRET" ``` ---- -Config options: `turn_username` and `turn_password` +### `turn_username` and `turn_password` The Username and password if the TURN server needs them and does not use a token. @@ -1837,7 +1977,7 @@ turn_username: "TURNSERVER_USERNAME" turn_password: "TURNSERVER_PASSWORD" ``` --- -Config option: `turn_user_lifetime` +### `turn_user_lifetime` How long generated TURN credentials last. Defaults to 1h. @@ -1846,7 +1986,7 @@ Example configuration: turn_user_lifetime: 2h ``` --- -Config option: `turn_allow_guests` +### `turn_allow_guests` Whether guests should be allowed to use the TURN server. This defaults to true, otherwise VoIP will be unreliable for guests. However, it does introduce a slight security risk as @@ -1862,7 +2002,7 @@ turn_allow_guests: false Registration can be rate-limited using the parameters in the [Ratelimiting](#ratelimiting) section of this manual. --- -Config option: `enable_registration` +### `enable_registration` Enable registration for new users. Defaults to false. It is highly recommended that if you enable registration, you use either captcha, email, or token-based verification to verify that new users are not bots. In order to enable registration @@ -1873,7 +2013,7 @@ Example configuration: enable_registration: true ``` --- -Config option: `enable_registration_without_verification` +### `enable_registration_without_verification` Enable registration without email or captcha verification. Note: this option is *not* recommended, as registration without verification is a known vector for spam and abuse. Defaults to false. Has no effect unless `enable_registration` is also enabled. @@ -1883,7 +2023,7 @@ Example configuration: enable_registration_without_verification: true ``` --- -Config option: `session_lifetime` +### `session_lifetime` Time that a user's session remains valid for, after they log in. @@ -1899,7 +2039,7 @@ Example configuration: session_lifetime: 24h ``` ---- -Config option: `refresh_access_token_lifetime` +### `refresh_access_token_lifetime` Time that an access token remains valid for, if the session is using refresh tokens. @@ -1917,7 +2057,7 @@ Example configuration: refreshable_access_token_lifetime: 10m ``` --- -Config option: `refresh_token_lifetime: 24h` +### `refresh_token_lifetime: 24h` Time that a refresh token remains valid for (provided that it is not exchanged for another one first). @@ -1934,7 +2074,7 @@ Example configuration: refresh_token_lifetime: 24h ``` --- -Config option: `nonrefreshable_access_token_lifetime` +### `nonrefreshable_access_token_lifetime` Time that an access token remains valid for, if the session is NOT using refresh tokens. @@ -1953,7 +2093,7 @@ Example configuration: nonrefreshable_access_token_lifetime: 24h ``` --- -Config option: `registrations_require_3pid` +### `registrations_require_3pid` If this is set, the user must provide all of the specified types of 3PID when registering. @@ -1964,7 +2104,7 @@ registrations_require_3pid: - msisdn ``` --- -Config option: `disable_msisdn_registration` +### `disable_msisdn_registration` Explicitly disable asking for MSISDNs from the registration flow (overrides `registrations_require_3pid` if MSISDNs are set as required). @@ -1974,7 +2114,7 @@ Example configuration: disable_msisdn_registration: true ``` --- -Config option: `allowed_local_3pids` +### `allowed_local_3pids` Mandate that users are only allowed to associate certain formats of 3PIDs with accounts on this server, as specified by the `medium` and `pattern` sub-options. @@ -1990,7 +2130,7 @@ allowed_local_3pids: pattern: '\+44' ``` --- -Config option: `enable_3pid_lookup` +### `enable_3pid_lookup` Enable 3PIDs lookup requests to identity servers from this server. Defaults to true. @@ -1999,7 +2139,7 @@ Example configuration: enable_3pid_lookup: false ``` --- -Config option: `registration_requires_token` +### `registration_requires_token` Require users to submit a token during registration. Tokens can be managed using the admin [API](../administration/admin_api/registration_tokens.md). @@ -2012,7 +2152,7 @@ Example configuration: registration_requires_token: true ``` --- -Config option: `registration_shared_secret` +### `registration_shared_secret` If set, allows registration of standard or admin accounts by anyone who has the shared secret, even if registration is otherwise disabled. @@ -2022,7 +2162,7 @@ Example configuration: registration_shared_secret: <PRIVATE STRING> ``` --- -Config option: `bcrypt_rounds` +### `bcrypt_rounds` Set the number of bcrypt rounds used to generate password hash. Larger numbers increase the work factor needed to generate the hash. @@ -2034,7 +2174,7 @@ Example configuration: bcrypt_rounds: 14 ``` --- -Config option: `allow_guest_access` +### `allow_guest_access` Allows users to register as guests without a password/email/etc, and participate in rooms hosted on this server which have been made @@ -2045,7 +2185,7 @@ Example configuration: allow_guest_access: true ``` --- -Config option: `default_identity_server` +### `default_identity_server` The identity server which we suggest that clients should use when users log in on this server. @@ -2058,36 +2198,32 @@ Example configuration: default_identity_server: https://matrix.org ``` --- -Config option: `account_threepid_delegates` +### `account_threepid_delegates` -Handle threepid (email/phone etc) registration and password resets through a set of -*trusted* identity servers. Note that this allows the configured identity server to -reset passwords for accounts! +Delegate verification of phone numbers to an identity server. -Be aware that if `email` is not set, and SMTP options have not been -configured in the email config block, registration and user password resets via -email will be globally disabled. +When a user wishes to add a phone number to their account, we need to verify that they +actually own that phone number, which requires sending them a text message (SMS). +Currently Synapse does not support sending those texts itself and instead delegates the +task to an identity server. The base URI for the identity server to be used is +specified by the `account_threepid_delegates.msisdn` option. -Additionally, if `msisdn` is not set, registration and password resets via msisdn -will be disabled regardless, and users will not be able to associate an msisdn -identifier to their account. This is due to Synapse currently not supporting -any method of sending SMS messages on its own. +If this is left unspecified, Synapse will not allow users to add phone numbers to +their account. -To enable using an identity server for operations regarding a particular third-party -identifier type, set the value to the URL of that identity server as shown in the -examples below. +(Servers handling the these requests must answer the `/requestToken` endpoints defined +by the Matrix Identity Service API +[specification](https://matrix.org/docs/spec/identity_service/latest).) -Servers handling the these requests must answer the `/requestToken` endpoints defined -by the Matrix Identity Service API [specification](https://matrix.org/docs/spec/identity_service/latest). +*Updated in Synapse 1.64.0*: No longer accepts an `email` option. Example configuration: ```yaml account_threepid_delegates: - email: https://example.com # Delegate email sending to example.com msisdn: http://localhost:8090 # Delegate SMS sending to this local process ``` --- -Config option: `enable_set_displayname` +### `enable_set_displayname` Whether users are allowed to change their displayname after it has been initially set. Useful when provisioning users based on the @@ -2100,7 +2236,7 @@ Example configuration: enable_set_displayname: false ``` --- -Config option: `enable_set_avatar_url` +### `enable_set_avatar_url` Whether users are allowed to change their avatar after it has been initially set. Useful when provisioning users based on the contents @@ -2113,7 +2249,7 @@ Example configuration: enable_set_avatar_url: false ``` --- -Config option: `enable_3pid_changes` +### `enable_3pid_changes` Whether users can change the third-party IDs associated with their accounts (email address and msisdn). @@ -2125,7 +2261,7 @@ Example configuration: enable_3pid_changes: false ``` --- -Config option: `auto_join_rooms` +### `auto_join_rooms` Users who register on this homeserver will automatically be joined to the rooms listed under this option. @@ -2143,7 +2279,7 @@ auto_join_rooms: - "#anotherexampleroom:example.com" ``` --- -Config option: `autocreate_auto_join_rooms` +### `autocreate_auto_join_rooms` Where `auto_join_rooms` are specified, setting this flag ensures that the rooms exist by creating them when the first user on the @@ -2163,7 +2299,7 @@ Example configuration: autocreate_auto_join_rooms: false ``` --- -Config option: `autocreate_auto_join_rooms_federated` +### `autocreate_auto_join_rooms_federated` Whether the rooms listen in `auto_join_rooms` that are auto-created are available via federation. Only has an effect if `autocreate_auto_join_rooms` is true. @@ -2180,7 +2316,7 @@ Example configuration: autocreate_auto_join_rooms_federated: false ``` --- -Config option: `autocreate_auto_join_room_preset` +### `autocreate_auto_join_room_preset` The room preset to use when auto-creating one of `auto_join_rooms`. Only has an effect if `autocreate_auto_join_rooms` is true. @@ -2202,7 +2338,7 @@ Example configuration: autocreate_auto_join_room_preset: private_chat ``` --- -Config option: `auto_join_mxid_localpart` +### `auto_join_mxid_localpart` The local part of the user id which is used to create `auto_join_rooms` if `autocreate_auto_join_rooms` is true. If this is not provided then the @@ -2226,7 +2362,7 @@ Example configuration: auto_join_mxid_localpart: system ``` --- -Config option: `auto_join_rooms_for_guests` +### `auto_join_rooms_for_guests` When `auto_join_rooms` is specified, setting this flag to false prevents guest accounts from being automatically joined to the rooms. @@ -2238,7 +2374,7 @@ Example configuration: auto_join_rooms_for_guests: false ``` --- -Config option: `inhibit_user_in_use_error` +### `inhibit_user_in_use_error` Whether to inhibit errors raised when registering a new account if the user ID already exists. If turned on, requests to `/register/available` will always @@ -2257,7 +2393,7 @@ inhibit_user_in_use_error: true Config options related to metrics. --- -Config option: `enable_metrics` +### `enable_metrics` Set to true to enable collection and rendering of performance metrics. Defaults to false. @@ -2267,7 +2403,7 @@ Example configuration: enable_metrics: true ``` --- -Config option: `sentry` +### `sentry` Use this option to enable sentry integration. Provide the DSN assigned to you by sentry with the `dsn` setting. @@ -2284,7 +2420,7 @@ sentry: dsn: "..." ``` --- -Config option: `metrics_flags` +### `metrics_flags` Flags to enable Prometheus metrics which are not suitable to be enabled by default, either for performance reasons or limited use. @@ -2299,20 +2435,25 @@ metrics_flags: known_servers: true ``` --- -Config option: `report_stats` +### `report_stats` -Whether or not to report anonymized homeserver usage statistics. This is originally +Whether or not to report homeserver usage statistics. This is originally set when generating the config. Set this option to true or false to change the current -behavior. +behavior. See +[Reporting Homeserver Usage Statistics](../administration/monitoring/reporting_homeserver_usage_statistics.md) +for information on what data is reported. + +Statistics will be reported 5 minutes after Synapse starts, and then every 3 hours +after that. Example configuration: ```yaml report_stats: true ``` --- -Config option: `report_stats_endpoint` +### `report_stats_endpoint` -The endpoint to report the anonymized homeserver usage statistics to. +The endpoint to report homeserver usage statistics to. Defaults to https://matrix.org/report-usage-stats/push Example configuration: @@ -2324,7 +2465,7 @@ report_stats_endpoint: https://example.com/report-usage-stats/push Config settings related to the client/server API --- -Config option: `room_prejoin_state:` +### `room_prejoin_state:` Controls for the state that is shared with users who receive an invite to a room. By default, the following state event types are shared with users who @@ -2353,7 +2494,7 @@ room_prejoin_state: - m.room.join_rules ``` --- -Config option: `track_puppeted_user_ips` +### `track_puppeted_user_ips` We record the IP address of clients used to access the API for various reasons, including displaying it to the user in the "Where you're signed in" @@ -2373,7 +2514,7 @@ Example configuration: track_puppeted_user_ips: true ``` --- -Config option: `app_service_config_files` +### `app_service_config_files` A list of application service config files to use. @@ -2384,7 +2525,7 @@ app_service_config_files: - app_service_2.yaml ``` --- -Config option: `track_appservice_user_ips` +### `track_appservice_user_ips` Defaults to false. Set to true to enable tracking of application service IP addresses. Implicitly enables MAU tracking for application service users. @@ -2394,7 +2535,7 @@ Example configuration: track_appservice_user_ips: true ``` --- -Config option: `macaroon_secret_key` +### `macaroon_secret_key` A secret which is used to sign access tokens. If none is specified, the `registration_shared_secret` is used, if one is given; otherwise, @@ -2405,7 +2546,7 @@ Example configuration: macaroon_secret_key: <PRIVATE STRING> ``` --- -Config option: `form_secret` +### `form_secret` A secret which is used to calculate HMACs for form values, to stop falsification of values. Must be specified for the User Consent @@ -2420,7 +2561,7 @@ form_secret: <PRIVATE STRING> Config options relating to signing keys --- -Config option: `signing_key_path` +### `signing_key_path` Path to the signing key to sign messages with. @@ -2429,7 +2570,7 @@ Example configuration: signing_key_path: "CONFDIR/SERVERNAME.signing.key" ``` --- -Config option: `old_signing_keys` +### `old_signing_keys` The keys that the server used to sign messages with but won't use to sign new messages. For each key, `key` should be the base64-encoded public key, and @@ -2445,7 +2586,7 @@ old_signing_keys: "ed25519:id": { key: "base64string", expired_ts: 123456789123 } ``` --- -Config option: `key_refresh_interval` +### `key_refresh_interval` How long key response published by this server is valid for. Used to set the `valid_until_ts` in `/key/v2` APIs. @@ -2457,7 +2598,7 @@ Example configuration: key_refresh_interval: 2d ``` --- -Config option: `trusted_key_servers:` +### `trusted_key_servers:` The trusted servers to download signing keys from. @@ -2500,7 +2641,7 @@ trusted_key_servers: - server_name: "matrix.org" ``` --- -Config option: `suppress_key_server_warning` +### `suppress_key_server_warning` Set the following to true to disable the warning that is emitted when the `trusted_key_servers` include 'matrix.org'. See above. @@ -2510,7 +2651,7 @@ Example configuration: suppress_key_server_warning: true ``` --- -Config option: `key_server_signing_keys_path` +### `key_server_signing_keys_path` The signing keys to use when acting as a trusted key server. If not specified defaults to the server signing key. @@ -2536,7 +2677,7 @@ You will also want to investigate the settings under the "sso" configuration section below. --- -Config option: `saml2_config` +### `saml2_config` Enable SAML2 for registration and login. Uses pysaml2. To learn more about pysaml and to find a full list options for configuring pysaml, read the docs [here](https://pysaml2.readthedocs.io/en/latest/). @@ -2673,7 +2814,7 @@ saml2_config: idp_entityid: 'https://our_idp/entityid' ``` --- -Config option: `oidc_providers` +### `oidc_providers` List of OpenID Connect (OIDC) / OAuth 2.0 identity providers, for registration and login. See [here](../../openid.md) @@ -2861,7 +3002,7 @@ oidc_providers: value: "synapseUsers" ``` --- -Config option: `cas_config` +### `cas_config` Enable Central Authentication Service (CAS) for registration and login. Has the following sub-options: @@ -2887,7 +3028,7 @@ cas_config: department: None ``` --- -Config option: `sso` +### `sso` Additional settings to use with single-sign on systems such as OpenID Connect, SAML2 and CAS. @@ -2924,7 +3065,7 @@ sso: update_profile_information: true ``` --- -Config option: `jwt_config` +### `jwt_config` JSON web token integration. The following settings can be used to make Synapse JSON web tokens for authentication, instead of its internal @@ -2946,8 +3087,10 @@ Additional sub-options for this setting include: tokens. Defaults to false. * `secret`: This is either the private shared secret or the public key used to decode the contents of the JSON web token. Required if `enabled` is set to true. -* `algorithm`: The algorithm used to sign the JSON web token. Supported algorithms are listed at - https://pyjwt.readthedocs.io/en/latest/algorithms.html Required if `enabled` is set to true. +* `algorithm`: The algorithm used to sign (or HMAC) the JSON web token. + Supported algorithms are listed + [here (section JWS)](https://docs.authlib.org/en/latest/specs/rfc7518.html). + Required if `enabled` is set to true. * `subject_claim`: Name of the claim containing a unique identifier for the user. Optional, defaults to `sub`. * `issuer`: The issuer to validate the "iss" claim against. Optional. If provided the @@ -2969,7 +3112,7 @@ jwt_config: - "provided-by-your-issuer" ``` --- -Config option: `password_config` +### `password_config` Use this setting to enable password-based logins. @@ -2981,7 +3124,7 @@ This setting has the following sub-options: * `localdb_enabled`: Set to false to disable authentication against the local password database. This is ignored if `enabled` is false, and is only useful if you have other `password_providers`. Defaults to true. -* `pepper`: Set the value here to a secret random string for extra security. # Uncomment and change to a secret random string for extra security. +* `pepper`: Set the value here to a secret random string for extra security. DO NOT CHANGE THIS AFTER INITIAL SETUP! * `policy`: Define and enforce a password policy, such as minimum lengths for passwords, etc. Each parameter is optional. This is an implementation of MSC2000. Parameters are as follows: @@ -3013,7 +3156,7 @@ password_config: require_uppercase: true ``` --- -Config option: `ui_auth` +### `ui_auth` The amount of time to allow a user-interactive authentication session to be active. @@ -3035,7 +3178,7 @@ ui_auth: session_timeout: "15s" ``` --- -Config option: `email` +### `email` Configuration for sending emails from Synapse. @@ -3138,7 +3281,7 @@ email: Configuration settings related to push notifications --- -Config option: `push` +### `push` This setting defines options for push notifications. @@ -3171,7 +3314,7 @@ push: Config options relating to rooms. --- -Config option: `encryption_enabled_by_default` +### `encryption_enabled_by_default` Controls whether locally-created rooms should be end-to-end encrypted by default. @@ -3193,7 +3336,7 @@ Example configuration: encryption_enabled_by_default_for_room_type: invite ``` --- -Config option: `user_directory` +### `user_directory` This setting defines options related to the user directory. @@ -3224,7 +3367,7 @@ user_directory: prefer_local_users: true ``` --- -Config option: `user_consent` +### `user_consent` For detailed instructions on user consent configuration, see [here](../../consent_tracking.md). @@ -3275,7 +3418,7 @@ user_consent: policy_name: Privacy Policy ``` --- -Config option: `stats` +### `stats` Settings for local room and user statistics collection. See [here](../../room_and_user_statistics.md) for more. @@ -3290,7 +3433,7 @@ stats: enabled: false ``` --- -Config option: `server_notices` +### `server_notices` Use this setting to enable a room which can be used to send notices from the server to users. It is a special room which users cannot leave; notices @@ -3314,7 +3457,7 @@ server_notices: room_name: "Server Notices" ``` --- -Config option: `enable_room_list_search` +### `enable_room_list_search` Set to false to disable searching the public room list. When disabled blocks searching local and remote room lists for local and remote @@ -3325,7 +3468,7 @@ Example configuration: enable_room_list_search: false ``` --- -Config option: `alias_creation` +### `alias_creation` The `alias_creation` option controls who is allowed to create aliases on this server. @@ -3355,7 +3498,7 @@ alias_creation_rules: action: deny ``` --- -Config options: `room_list_publication_rules` +### `room_list_publication_rules` The `room_list_publication_rules` option controls who can publish and which rooms can be published in the public room list. @@ -3386,7 +3529,7 @@ room_list_publication_rules: ``` --- -Config option: `default_power_level_content_override` +### `default_power_level_content_override` The `default_power_level_content_override` option controls the default power levels for rooms. @@ -3415,7 +3558,7 @@ default_power_level_content_override: Configuration options related to Opentracing support. --- -Config option: `opentracing` +### `opentracing` These settings enable and configure opentracing, which implements distributed tracing. This allows you to observe the causal chains of events across servers @@ -3458,7 +3601,7 @@ opentracing: Configuration options related to workers. --- -Config option: `send_federation` +### `send_federation` Controls sending of outbound federation transactions on the main process. Set to false if using a federation sender worker. Defaults to true. @@ -3468,7 +3611,7 @@ Example configuration: send_federation: false ``` --- -Config option: `federation_sender_instances` +### `federation_sender_instances` It is possible to run multiple federation sender workers, in which case the work is balanced across them. Use this setting to list the senders. @@ -3484,7 +3627,7 @@ federation_sender_instances: - federation_sender1 ``` --- -Config option: `instance_map` +### `instance_map` When using workers this should be a map from worker name to the HTTP replication listener of the worker, if configured. @@ -3497,7 +3640,7 @@ instance_map: port: 8034 ``` --- -Config option: `stream_writers` +### `stream_writers` Experimental: When using workers you can define which workers should handle event persistence and typing notifications. Any worker @@ -3510,7 +3653,7 @@ stream_writers: typing: worker1 ``` --- -Config option: `run_background_tasks_on` +### `run_background_tasks_on` The worker that is used to run background tasks (e.g. cleaning up expired data). If not provided this defaults to the main process. @@ -3520,7 +3663,7 @@ Example configuration: run_background_tasks_on: worker1 ``` --- -Config option: `worker_replication_secret` +### `worker_replication_secret` A shared secret used by the replication APIs to authenticate HTTP requests from workers. @@ -3531,7 +3674,7 @@ Example configuration: ```yaml worker_replication_secret: "secret_secret" ``` -Config option: `redis` +### `redis` Configuration for Redis when using workers. This *must* be enabled when using workers (unless using old style direct TCP configuration). @@ -3553,7 +3696,7 @@ redis: Configuration settings related to background updates. --- -Config option: `background_updates` +### `background_updates` Background updates are database updates that are run in the background in batches. The duration, minimum batch size, default batch size, whether to sleep between batches and if so, how long to @@ -3578,3 +3721,4 @@ background_updates: min_batch_size: 10 default_batch_size: 50 ``` + diff --git a/docs/usage/configuration/homeserver_sample_config.md b/docs/usage/configuration/homeserver_sample_config.md index 11e806998d..2dbfb35baa 100644 --- a/docs/usage/configuration/homeserver_sample_config.md +++ b/docs/usage/configuration/homeserver_sample_config.md @@ -9,6 +9,9 @@ a real homeserver.yaml. Instead, if you are starting from scratch, please genera a fresh config using Synapse by following the instructions in [Installation](../../setup/installation.md). +Documentation for all configuration options can be found in the +[Configuration Manual](./config_documentation.md). + ```yaml {{#include ../../sample_config.yaml}} ``` diff --git a/docs/usage/configuration/user_authentication/single_sign_on/cas.md b/docs/usage/configuration/user_authentication/single_sign_on/cas.md index 3bac1b29f0..899face876 100644 --- a/docs/usage/configuration/user_authentication/single_sign_on/cas.md +++ b/docs/usage/configuration/user_authentication/single_sign_on/cas.md @@ -4,5 +4,5 @@ Synapse supports authenticating users via the [Central Authentication Service protocol](https://en.wikipedia.org/wiki/Central_Authentication_Service) (CAS) natively. -Please see the `cas_config` and `sso` sections of the [Synapse configuration -file](../../../configuration/homeserver_sample_config.md) for more details. \ No newline at end of file +Please see the [cas_config](../../../configuration/config_documentation.md#cas_config) and [sso](../../../configuration/config_documentation.md#sso) +sections of the configuration manual for more details. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 0c01452cc7..2ebadadbfb 100644 --- a/mypy.ini +++ b/mypy.ini @@ -56,7 +56,6 @@ exclude = (?x) |tests/server.py |tests/server_notices/test_resource_limits_server_notices.py |tests/test_metrics.py - |tests/test_server.py |tests/test_state.py |tests/test_terms_auth.py |tests/util/caches/test_cached_call.py @@ -74,7 +73,6 @@ exclude = (?x) |tests/util/test_lrucache.py |tests/util/test_rwlock.py |tests/util/test_wheel_timer.py - |tests/utils.py )$ [mypy-synapse.federation.transport.client] @@ -86,12 +84,6 @@ disallow_untyped_defs = False [mypy-synapse.http.matrixfederationclient] disallow_untyped_defs = False -[mypy-synapse.logging.opentracing] -disallow_untyped_defs = False - -[mypy-synapse.logging.scopecontextmanager] -disallow_untyped_defs = False - [mypy-synapse.metrics._reactor_metrics] disallow_untyped_defs = False # This module imports select.epoll. That exists on Linux, but doesn't on macOS. @@ -113,6 +105,9 @@ disallow_untyped_defs = False [mypy-tests.handlers.test_user_directory] disallow_untyped_defs = True +[mypy-tests.test_server] +disallow_untyped_defs = True + [mypy-tests.state.test_profile] disallow_untyped_defs = True @@ -128,6 +123,9 @@ disallow_untyped_defs = True [mypy-tests.federation.transport.test_client] disallow_untyped_defs = True +[mypy-tests.utils] +disallow_untyped_defs = True + ;; Dependencies without annotations ;; Before ignoring a module, check to see if type stubs are available. diff --git a/poetry.lock b/poetry.lock index b8ec2b1a93..da42c07fc5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -840,7 +840,7 @@ python-versions = ">=3.5" name = "pyjwt" version = "2.4.0" description = "JSON Web Token implementation in Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" @@ -1571,9 +1571,9 @@ docs = ["sphinx", "repoze.sphinx.autointerface"] test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"] [extras] -all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "pyjwt", "txredisapi", "hiredis", "Pympler"] +all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler"] cache_memory = ["Pympler"] -jwt = ["pyjwt"] +jwt = ["authlib"] matrix-synapse-ldap3 = ["matrix-synapse-ldap3"] oidc = ["authlib"] opentracing = ["jaeger-client", "opentracing"] @@ -1588,7 +1588,7 @@ url_preview = ["lxml"] [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "dc3293c36ad2166c90e58b9a291277cdb76bc57a4930e94f3ca8e4c78340d187" +content-hash = "726ecbbacd7199e4f42517cbcafedeebf560e7fab0d45fc00f1611c9895ee4c2" [metadata.files] attrs = [ diff --git a/pyproject.toml b/pyproject.toml index 5de756d2ea..f2a5235600 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ skip_gitignore = true [tool.poetry] name = "matrix-synapse" -version = "1.61.0" +version = "1.63.1" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors <packages@matrix.org>"] license = "Apache-2.0" @@ -110,9 +110,11 @@ jsonschema = ">=3.0.0" frozendict = ">=1,!=2.1.2" # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0 unpaddedbase64 = ">=2.1.0" -canonicaljson = ">=1.4.0" +# We require 1.5.0 to work around an issue when running against the C implementation of +# frozendict: https://github.com/matrix-org/python-canonicaljson/issues/36 +canonicaljson = "^1.5.0" # we use the type definitions added in signedjson 1.1. -signedjson = ">=1.1.0" +signedjson = "^1.1.0" # validating SSL certs for IP addresses requires service_identity 18.1. service-identity = ">=18.1.0" # Twisted 18.9 introduces some logger improvements that the structured @@ -150,7 +152,7 @@ typing-extensions = ">=3.10.0.1" cryptography = ">=3.4.7" # ijson 3.1.4 fixes a bug with "." in property names ijson = ">=3.1.4" -matrix-common = "~=1.2.1" +matrix-common = "^1.2.1" # We need packaging.requirements.Requirement, added in 16.1. packaging = ">=16.1" # At the time of writing, we only use functions from the version `importlib.metadata` @@ -175,7 +177,6 @@ lxml = { version = ">=4.2.0", optional = true } sentry-sdk = { version = ">=0.7.2", optional = true } opentracing = { version = ">=2.2.0", optional = true } jaeger-client = { version = ">=4.0.0", optional = true } -pyjwt = { version = ">=1.6.4", optional = true } txredisapi = { version = ">=1.4.7", optional = true } hiredis = { version = "*", optional = true } Pympler = { version = "*", optional = true } @@ -196,7 +197,7 @@ systemd = ["systemd-python"] url_preview = ["lxml"] sentry = ["sentry-sdk"] opentracing = ["jaeger-client", "opentracing"] -jwt = ["pyjwt"] +jwt = ["authlib"] # hiredis is not a *strict* dependency, but it makes things much faster. # (if it is not installed, we fall back to slow code.) redis = ["txredisapi", "hiredis"] @@ -222,7 +223,7 @@ all = [ "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", # saml2 "pysaml2", - # oidc + # oidc and jwt "authlib", # url_preview "lxml", @@ -230,8 +231,6 @@ all = [ "sentry-sdk", # opentracing "jaeger-client", "opentracing", - # jwt - "pyjwt", # redis "txredisapi", "hiredis", # cache_memory diff --git a/scripts-dev/build_debian_packages.py b/scripts-dev/build_debian_packages.py index 38564893e9..cd2e64b75f 100755 --- a/scripts-dev/build_debian_packages.py +++ b/scripts-dev/build_debian_packages.py @@ -26,7 +26,6 @@ DISTS = ( "debian:bookworm", "debian:sid", "ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14) - "ubuntu:impish", # 21.10 (EOL 2022-07) "ubuntu:jammy", # 22.04 LTS (EOL 2027-04) ) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index f1843717cb..6381f7092e 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -14,12 +14,18 @@ # By default Synapse is run in monolith mode. This can be overridden by # setting the WORKERS environment variable. # -# A regular expression of test method names can be supplied as the first -# argument to the script. Complement will then only run those tests. If -# no regex is supplied, all tests are run. For example; +# You can optionally give a "-f" argument (for "fast") before any to skip +# rebuilding the docker images, if you just want to rerun the tests. +# +# Remaining commandline arguments are passed through to `go test`. For example, +# you can supply a regular expression of test method names via the "-run" +# argument: # # ./complement.sh -run "TestOutboundFederation(Profile|Send)" # +# Specifying TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 will cause `poetry export` +# to not emit any hashes when building the Docker image. This then means that +# you can use 'unverifiable' sources such as git repositories as dependencies. # Exit if a line returns a non-zero exit code set -e @@ -32,6 +38,47 @@ echo_if_github() { fi } +# Helper to print out the usage instructions +usage() { + cat >&2 <<EOF +Usage: $0 [-f] <go test arguments>... +Run the complement test suite on Synapse. + + -f, --fast + Skip rebuilding the docker images, and just use the most recent + 'complement-synapse:latest' image. + Conflicts with --build-only. + + --build-only + Only build the Docker images. Don't actually run Complement. + Conflicts with -f/--fast. + +For help on arguments to 'go test', run 'go help testflag'. +EOF +} + +# parse our arguments +skip_docker_build="" +skip_complement_run="" +while [ $# -ge 1 ]; do + arg=$1 + case "$arg" in + "-h") + usage + exit 1 + ;; + "-f"|"--fast") + skip_docker_build=1 + ;; + "--build-only") + skip_complement_run=1 + ;; + *) + # unknown arg: presumably an argument to gotest. break the loop. + break + esac + shift +done # enable buildkit for the docker builds export DOCKER_BUILDKIT=1 @@ -49,21 +96,30 @@ if [[ -z "$COMPLEMENT_DIR" ]]; then echo "Checkout available at 'complement-${COMPLEMENT_REF}'" fi -# Build the base Synapse image from the local checkout -echo_if_github "::group::Build Docker image: matrixdotorg/synapse" -docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . -echo_if_github "::endgroup::" - -# Build the workers docker image (from the base Synapse image we just built). -echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers" -docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" . -echo_if_github "::endgroup::" +if [ -z "$skip_docker_build" ]; then + # Build the base Synapse image from the local checkout + echo_if_github "::group::Build Docker image: matrixdotorg/synapse" + docker build -t matrixdotorg/synapse \ + --build-arg TEST_ONLY_SKIP_DEP_HASH_VERIFICATION \ + -f "docker/Dockerfile" . + echo_if_github "::endgroup::" + + # Build the workers docker image (from the base Synapse image we just built). + echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers" + docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" . + echo_if_github "::endgroup::" + + # Build the unified Complement image (from the worker Synapse image we just built). + echo_if_github "::group::Build Docker image: complement/Dockerfile" + docker build -t complement-synapse \ + -f "docker/complement/Dockerfile" "docker/complement" + echo_if_github "::endgroup::" +fi -# Build the unified Complement image (from the worker Synapse image we just built). -echo_if_github "::group::Build Docker image: complement/Dockerfile" -docker build -t complement-synapse \ - -f "docker/complement/Dockerfile" "docker/complement" -echo_if_github "::endgroup::" +if [ -n "$skip_complement_run" ]; then + echo "Skipping Complement run as requested." + exit +fi export COMPLEMENT_BASE_IMAGE=complement-synapse @@ -104,6 +160,18 @@ else test_tags="$test_tags,faster_joins" fi + +if [[ -n "$SYNAPSE_TEST_LOG_LEVEL" ]]; then + # Set the log level to what is desired + export PASS_SYNAPSE_LOG_LEVEL="$SYNAPSE_TEST_LOG_LEVEL" + + # Allow logging sensitive things (currently SQL queries & parameters). + # (This won't have any effect if we're not logging at DEBUG level overall.) + # Since this is just a test suite, this is fine and won't reveal anyone's + # personal information + export PASS_SYNAPSE_LOG_SENSITIVE=1 +fi + # Run the tests! echo "Images built; running complement" cd "$COMPLEMENT_DIR" diff --git a/synapse/_scripts/generate_config.py b/synapse/_scripts/generate_config.py index 08eb8ef114..06c11c60da 100755 --- a/synapse/_scripts/generate_config.py +++ b/synapse/_scripts/generate_config.py @@ -33,7 +33,7 @@ def main() -> None: parser.add_argument( "--report-stats", action="store", - help="Whether the generated config reports anonymized usage statistics", + help="Whether the generated config reports homeserver usage statistics", choices=["yes", "no"], ) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 9c06c837dc..543bba27c2 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -58,9 +58,7 @@ from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateSt from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyBackgroundStore -from synapse.storage.databases.main.event_push_actions import ( - EventPushActionsWorkerStore, -) +from synapse.storage.databases.main.event_push_actions import EventPushActionsStore from synapse.storage.databases.main.events_bg_updates import ( EventsBackgroundUpdatesStore, ) @@ -168,22 +166,6 @@ IGNORED_TABLES = { "ui_auth_sessions", "ui_auth_sessions_credentials", "ui_auth_sessions_ips", - # Groups/communities is no longer supported. - "group_attestations_remote", - "group_attestations_renewals", - "group_invites", - "group_roles", - "group_room_categories", - "group_rooms", - "group_summary_roles", - "group_summary_room_categories", - "group_summary_rooms", - "group_summary_users", - "group_users", - "groups", - "local_group_membership", - "local_group_updates", - "remote_profile_cache", } @@ -202,7 +184,7 @@ R = TypeVar("R") class Store( - EventPushActionsWorkerStore, + EventPushActionsStore, ClientIpBackgroundUpdateStore, DeviceInboxBackgroundUpdateStore, DeviceBackgroundUpdateStore, @@ -270,6 +252,9 @@ class MockHomeserver: def get_instance_name(self) -> str: return "master" + def should_send_federation(self) -> bool: + return False + class Porter: def __init__( @@ -417,12 +402,15 @@ class Porter: self.progress.update(table, table_size) # Mark table as done return + # We sweep over rowids in two directions: one forwards (rowids 1, 2, 3, ...) + # and another backwards (rowids 0, -1, -2, ...). forward_select = ( "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) ) backward_select = ( - "SELECT rowid, * FROM %s WHERE rowid <= ? ORDER BY rowid LIMIT ?" % (table,) + "SELECT rowid, * FROM %s WHERE rowid <= ? ORDER BY rowid DESC LIMIT ?" + % (table,) ) do_forward = [True] @@ -620,6 +608,25 @@ class Porter: self.postgres_store.db_pool.updates.has_completed_background_updates() ) + @staticmethod + def _is_sqlite_autovacuum_enabled(txn: LoggingTransaction) -> bool: + """ + Returns true if auto_vacuum is enabled in SQLite. + https://www.sqlite.org/pragma.html#pragma_auto_vacuum + + Vacuuming changes the rowids on rows in the database. + Auto-vacuuming is therefore dangerous when used in conjunction with this script. + + Note that the auto_vacuum setting can't be changed without performing + a VACUUM after trying to change the pragma. + """ + txn.execute("PRAGMA auto_vacuum") + row = txn.fetchone() + assert row is not None, "`PRAGMA auto_vacuum` did not give a row." + (autovacuum_setting,) = row + # 0 means off. 1 means full. 2 means incremental. + return autovacuum_setting != 0 + async def run(self) -> None: """Ports the SQLite database to a PostgreSQL database. @@ -636,6 +643,21 @@ class Porter: allow_outdated_version=True, ) + # For safety, ensure auto_vacuums are disabled. + if await self.sqlite_store.db_pool.runInteraction( + "is_sqlite_autovacuum_enabled", self._is_sqlite_autovacuum_enabled + ): + end_error = ( + "auto_vacuum is enabled in the SQLite database." + " (This is not the default configuration.)\n" + " This script relies on rowids being consistent and must not" + " be used if the database could be vacuumed between re-runs.\n" + " To disable auto_vacuum, you need to stop Synapse and run the following SQL:\n" + " PRAGMA auto_vacuum=off;\n" + " VACUUM;" + ) + return + # Check if all background updates are done, abort if not. updates_complete = ( await self.sqlite_store.db_pool.updates.has_completed_background_updates() diff --git a/synapse/api/constants.py b/synapse/api/constants.py index e1d31cabed..2653764119 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -259,3 +259,13 @@ class ReceiptTypes: READ: Final = "m.read" READ_PRIVATE: Final = "org.matrix.msc2285.read.private" FULLY_READ: Final = "m.fully_read" + + +class PublicRoomsFilterFields: + """Fields in the search filter for `/publicRooms` that we understand. + + As defined in https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3publicrooms + """ + + GENERIC_SEARCH_TERM: Final = "generic_search_term" + ROOM_TYPES: Final = "org.matrix.msc3827.room_types" diff --git a/synapse/api/errors.py b/synapse/api/errors.py index cc7b785472..1c74e131f2 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -297,8 +297,14 @@ class AuthError(SynapseError): other poorly-defined times. """ - def __init__(self, code: int, msg: str, errcode: str = Codes.FORBIDDEN): - super().__init__(code, msg, errcode) + def __init__( + self, + code: int, + msg: str, + errcode: str = Codes.FORBIDDEN, + additional_fields: Optional[dict] = None, + ): + super().__init__(code, msg, errcode, additional_fields) class InvalidClientCredentialsError(SynapseError): diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index 54d13026c9..f43965c1c8 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -27,6 +27,33 @@ class Ratelimiter: """ Ratelimit actions marked by arbitrary keys. + (Note that the source code speaks of "actions" and "burst_count" rather than + "tokens" and a "bucket_size".) + + This is a "leaky bucket as a meter". For each key to be tracked there is a bucket + containing some number 0 <= T <= `burst_count` of tokens corresponding to previously + permitted requests for that key. Each bucket starts empty, and gradually leaks + tokens at a rate of `rate_hz`. + + Upon an incoming request, we must determine: + - the key that this request falls under (which bucket to inspect), and + - the cost C of this request in tokens. + Then, if there is room in the bucket for C tokens (T + C <= `burst_count`), + the request is permitted and `cost` tokens are added to the bucket. + Otherwise the request is denied, and the bucket continues to hold T tokens. + + This means that the limiter enforces an average request frequency of `rate_hz`, + while accumulating a buffer of up to `burst_count` requests which can be consumed + instantaneously. + + The tricky bit is the leaking. We do not want to have a periodic process which + leaks every bucket! Instead, we track + - the time point when the bucket was last completely empty, and + - how many tokens have added to the bucket permitted since then. + Then for each incoming request, we can calculate how many tokens have leaked + since this time point, and use that to decide if we should accept or reject the + request. + Args: clock: A homeserver clock, for retrieving the current time rate_hz: The long term number of actions that can be performed in a second. @@ -41,14 +68,30 @@ class Ratelimiter: self.burst_count = burst_count self.store = store - # A ordered dictionary keeping track of actions, when they were last - # performed and how often. Each entry is a mapping from a key of arbitrary type - # to a tuple representing: - # * How many times an action has occurred since a point in time - # * The point in time - # * The rate_hz of this particular entry. This can vary per request + # An ordered dictionary representing the token buckets tracked by this rate + # limiter. Each entry maps a key of arbitrary type to a tuple representing: + # * The number of tokens currently in the bucket, + # * The time point when the bucket was last completely empty, and + # * The rate_hz (leak rate) of this particular bucket. self.actions: OrderedDict[Hashable, Tuple[float, float, float]] = OrderedDict() + def _get_key( + self, requester: Optional[Requester], key: Optional[Hashable] + ) -> Hashable: + """Use the requester's MXID as a fallback key if no key is provided.""" + if key is None: + if not requester: + raise ValueError("Must supply at least one of `requester` or `key`") + + key = requester.user.to_string() + return key + + def _get_action_counts( + self, key: Hashable, time_now_s: float + ) -> Tuple[float, float, float]: + """Retrieve the action counts, with a fallback representing an empty bucket.""" + return self.actions.get(key, (0.0, time_now_s, 0.0)) + async def can_do_action( self, requester: Optional[Requester], @@ -88,11 +131,7 @@ class Ratelimiter: * The reactor timestamp for when the action can be performed next. -1 if rate_hz is less than or equal to zero """ - if key is None: - if not requester: - raise ValueError("Must supply at least one of `requester` or `key`") - - key = requester.user.to_string() + key = self._get_key(requester, key) if requester: # Disable rate limiting of users belonging to any AS that is configured @@ -121,7 +160,7 @@ class Ratelimiter: self._prune_message_counts(time_now_s) # Check if there is an existing count entry for this key - action_count, time_start, _ = self.actions.get(key, (0.0, time_now_s, 0.0)) + action_count, time_start, _ = self._get_action_counts(key, time_now_s) # Check whether performing another action is allowed time_delta = time_now_s - time_start @@ -164,6 +203,37 @@ class Ratelimiter: return allowed, time_allowed + def record_action( + self, + requester: Optional[Requester], + key: Optional[Hashable] = None, + n_actions: int = 1, + _time_now_s: Optional[float] = None, + ) -> None: + """Record that an action(s) took place, even if they violate the rate limit. + + This is useful for tracking the frequency of events that happen across + federation which we still want to impose local rate limits on. For instance, if + we are alice.com monitoring a particular room, we cannot prevent bob.com + from joining users to that room. However, we can track the number of recent + joins in the room and refuse to serve new joins ourselves if there have been too + many in the room across both homeservers. + + Args: + requester: The requester that is doing the action, if any. + key: An arbitrary key used to classify an action. Defaults to the + requester's user ID. + n_actions: The number of times the user wants to do this action. If the user + cannot do all of the actions, the user's action count is not incremented + at all. + _time_now_s: The current time. Optional, defaults to the current time according + to self.clock. Only used by tests. + """ + key = self._get_key(requester, key) + time_now_s = _time_now_s if _time_now_s is not None else self.clock.time() + action_count, time_start, rate_hz = self._get_action_counts(key, time_now_s) + self.actions[key] = (action_count + n_actions, time_start, rate_hz) + def _prune_message_counts(self, time_now_s: float) -> None: """Remove message count entries that have not exceeded their defined rate_hz limit diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 3f85d61b46..00e81b3afc 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -84,6 +84,8 @@ class RoomVersion: # MSC3787: Adds support for a `knock_restricted` join rule, mixing concepts of # knocks and restricted join rules into the same join condition. msc3787_knock_restricted_join_rule: bool + # MSC3667: Enforce integer power levels + msc3667_int_only_power_levels: bool class RoomVersions: @@ -103,6 +105,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V2 = RoomVersion( "2", @@ -120,6 +123,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V3 = RoomVersion( "3", @@ -137,6 +141,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V4 = RoomVersion( "4", @@ -154,6 +159,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V5 = RoomVersion( "5", @@ -171,6 +177,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V6 = RoomVersion( "6", @@ -188,6 +195,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) MSC2176 = RoomVersion( "org.matrix.msc2176", @@ -205,6 +213,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V7 = RoomVersion( "7", @@ -222,6 +231,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V8 = RoomVersion( "8", @@ -239,6 +249,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) V9 = RoomVersion( "9", @@ -256,6 +267,7 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) MSC2716v3 = RoomVersion( "org.matrix.msc2716v3", @@ -273,6 +285,7 @@ class RoomVersions: msc2716_historical=True, msc2716_redactions=True, msc3787_knock_restricted_join_rule=False, + msc3667_int_only_power_levels=False, ) MSC3787 = RoomVersion( "org.matrix.msc3787", @@ -290,6 +303,25 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, msc3787_knock_restricted_join_rule=True, + msc3667_int_only_power_levels=False, + ) + V10 = RoomVersion( + "10", + RoomDisposition.STABLE, + EventFormatVersions.V3, + StateResolutionVersions.V2, + enforce_key_validity=True, + special_case_aliases_auth=False, + strict_canonicaljson=True, + limit_notifications_power_levels=True, + msc2176_redaction_rules=False, + msc3083_join_rules=True, + msc3375_redaction_rules=True, + msc2403_knocking=True, + msc2716_historical=False, + msc2716_redactions=False, + msc3787_knock_restricted_join_rule=True, + msc3667_int_only_power_levels=True, ) @@ -308,6 +340,7 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = { RoomVersions.V9, RoomVersions.MSC2716v3, RoomVersions.MSC3787, + RoomVersions.V10, ) } diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 363ac98ea9..923891ae0d 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -106,7 +106,9 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs) def start_worker_reactor( appname: str, config: HomeServerConfig, - run_command: Callable[[], None] = reactor.run, + # Use a lambda to avoid binding to a given reactor at import time. + # (needed when synapse.app.complement_fork_starter is being used) + run_command: Callable[[], None] = lambda: reactor.run(), ) -> None: """Run the reactor in the main process @@ -141,7 +143,9 @@ def start_reactor( daemonize: bool, print_pidfile: bool, logger: logging.Logger, - run_command: Callable[[], None] = reactor.run, + # Use a lambda to avoid binding to a given reactor at import time. + # (needed when synapse.app.complement_fork_starter is being used) + run_command: Callable[[], None] = lambda: reactor.run(), ) -> None: """Run the reactor in the main process diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 561621a285..8a583d3ec6 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -28,18 +28,22 @@ from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging from synapse.events import EventBase from synapse.handlers.admin import ExfiltrationWriter -from synapse.replication.slave.storage._base import BaseSlavedStore -from synapse.replication.slave.storage.account_data import SlavedAccountDataStore -from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore -from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.filtering import SlavedFilteringStore from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore -from synapse.replication.slave.storage.receipts import SlavedReceiptsStore -from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.server import HomeServer +from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.databases.main.account_data import AccountDataWorkerStore +from synapse.storage.databases.main.appservice import ( + ApplicationServiceTransactionWorkerStore, + ApplicationServiceWorkerStore, +) +from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore +from synapse.storage.databases.main.receipts import ReceiptsWorkerStore +from synapse.storage.databases.main.registration import RegistrationWorkerStore from synapse.storage.databases.main.room import RoomWorkerStore +from synapse.storage.databases.main.tags import TagsWorkerStore from synapse.types import StateMap from synapse.util import SYNAPSE_VERSION from synapse.util.logcontext import LoggingContext @@ -48,19 +52,30 @@ logger = logging.getLogger("synapse.app.admin_cmd") class AdminCmdSlavedStore( - SlavedReceiptsStore, - SlavedAccountDataStore, - SlavedApplicationServiceStore, - SlavedRegistrationStore, SlavedFilteringStore, - SlavedDeviceInboxStore, SlavedDeviceStore, SlavedPushRuleStore, SlavedEventStore, - BaseSlavedStore, + TagsWorkerStore, + DeviceInboxWorkerStore, + AccountDataWorkerStore, + ApplicationServiceTransactionWorkerStore, + ApplicationServiceWorkerStore, + RegistrationWorkerStore, + ReceiptsWorkerStore, RoomWorkerStore, ): - pass + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + # Annoyingly `filter_events_for_client` assumes that this exists. We + # should refactor it to take a `Clock` directly. + self.clock = hs.get_clock() class AdminCmdServer(HomeServer): diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py new file mode 100644 index 0000000000..89eb07df27 --- /dev/null +++ b/synapse/app/complement_fork_starter.py @@ -0,0 +1,190 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ## What this script does +# +# This script spawns multiple workers, whilst only going through the code loading +# process once. The net effect is that start-up time for a swarm of workers is +# reduced, particularly in CPU-constrained environments. +# +# Before the workers are spawned, the database is prepared in order to avoid the +# workers racing. +# +# ## Stability +# +# This script is only intended for use within the Synapse images for the +# Complement test suite. +# There are currently no stability guarantees whatsoever; especially not about: +# - whether it will continue to exist in future versions; +# - the format of its command-line arguments; or +# - any details about its behaviour or principles of operation. +# +# ## Usage +# +# The first argument should be the path to the database configuration, used to +# set up the database. The rest of the arguments are used as follows: +# Each worker is specified as an argument group (each argument group is +# separated by '--'). +# The first argument in each argument group is the Python module name of the application +# to start. Further arguments are then passed to that module as-is. +# +# ## Example +# +# python -m synapse.app.complement_fork_starter path_to_db_config.yaml \ +# synapse.app.homeserver [args..] -- \ +# synapse.app.generic_worker [args..] -- \ +# ... +# synapse.app.generic_worker [args..] +# +import argparse +import importlib +import itertools +import multiprocessing +import sys +from typing import Any, Callable, List + +from twisted.internet.main import installReactor + + +class ProxiedReactor: + """ + Twisted tracks the 'installed' reactor as a global variable. + (Actually, it does some module trickery, but the effect is similar.) + + The default EpollReactor is buggy if it's created before a process is + forked, then used in the child. + See https://twistedmatrix.com/trac/ticket/4759#comment:17. + + However, importing certain Twisted modules will automatically create and + install a reactor if one hasn't already been installed. + It's not normally possible to re-install a reactor. + + Given the goal of launching workers with fork() to only import the code once, + this presents a conflict. + Our work around is to 'install' this ProxiedReactor which prevents Twisted + from creating and installing one, but which lets us replace the actual reactor + in use later on. + """ + + def __init__(self) -> None: + self.___reactor_target: Any = None + + def _install_real_reactor(self, new_reactor: Any) -> None: + """ + Install a real reactor for this ProxiedReactor to forward lookups onto. + + This method is specific to our ProxiedReactor and should not clash with + any names used on an actual Twisted reactor. + """ + self.___reactor_target = new_reactor + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self.___reactor_target, attr_name) + + +def _worker_entrypoint( + func: Callable[[], None], proxy_reactor: ProxiedReactor, args: List[str] +) -> None: + """ + Entrypoint for a forked worker process. + + We just need to set up the command-line arguments, create our real reactor + and then kick off the worker's main() function. + """ + + sys.argv = args + + from twisted.internet.epollreactor import EPollReactor + + proxy_reactor._install_real_reactor(EPollReactor()) + func() + + +def main() -> None: + """ + Entrypoint for the forking launcher. + """ + parser = argparse.ArgumentParser() + parser.add_argument("db_config", help="Path to database config file") + parser.add_argument( + "args", + nargs="...", + help="Argument groups separated by `--`. " + "The first argument of each group is a Synapse app name. " + "Subsequent arguments are passed through.", + ) + ns = parser.parse_args() + + # Split up the subsequent arguments into each workers' arguments; + # `--` is our delimiter of choice. + args_by_worker: List[List[str]] = [ + list(args) + for cond, args in itertools.groupby(ns.args, lambda ele: ele != "--") + if cond and args + ] + + # Prevent Twisted from installing a shared reactor that all the workers will + # inherit when we fork(), by installing our own beforehand. + proxy_reactor = ProxiedReactor() + installReactor(proxy_reactor) + + # Import the entrypoints for all the workers. + worker_functions = [] + for worker_args in args_by_worker: + worker_module = importlib.import_module(worker_args[0]) + worker_functions.append(worker_module.main) + + # We need to prepare the database first as otherwise all the workers will + # try to create a schema version table and some will crash out. + from synapse._scripts import update_synapse_database + + update_proc = multiprocessing.Process( + target=_worker_entrypoint, + args=( + update_synapse_database.main, + proxy_reactor, + [ + "update_synapse_database", + "--database-config", + ns.db_config, + "--run-background-updates", + ], + ), + ) + print("===== PREPARING DATABASE =====", file=sys.stderr) + update_proc.start() + update_proc.join() + print("===== PREPARED DATABASE =====", file=sys.stderr) + + # At this point, we've imported all the main entrypoints for all the workers. + # Now we basically just fork() out to create the workers we need. + # Because we're using fork(), all the workers get a clone of this launcher's + # memory space and don't need to repeat the work of loading the code! + # Instead of using fork() directly, we use the multiprocessing library, + # which uses fork() on Unix platforms. + processes = [] + for (func, worker_args) in zip(worker_functions, args_by_worker): + process = multiprocessing.Process( + target=_worker_entrypoint, args=(func, proxy_reactor, worker_args) + ) + process.start() + processes.append(process) + + # Be a good parent and wait for our children to die before exiting. + for process in processes: + process.join() + + +if __name__ == "__main__": + main() diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 4a987fb759..42d1f6d219 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -48,20 +48,12 @@ from synapse.http.site import SynapseRequest, SynapseSite from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource -from synapse.replication.slave.storage._base import BaseSlavedStore -from synapse.replication.slave.storage.account_data import SlavedAccountDataStore -from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore -from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.devices import SlavedDeviceStore -from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.filtering import SlavedFilteringStore from synapse.replication.slave.storage.keys import SlavedKeyStore -from synapse.replication.slave.storage.profile import SlavedProfileStore from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.pushers import SlavedPusherStore -from synapse.replication.slave.storage.receipts import SlavedReceiptsStore -from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.rest.admin import register_servlets_for_media_repo from synapse.rest.client import ( account_data, @@ -100,8 +92,15 @@ from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.synapse.client import build_synapse_client_resource_tree from synapse.rest.well_known import well_known_resource from synapse.server import HomeServer +from synapse.storage.databases.main.account_data import AccountDataWorkerStore +from synapse.storage.databases.main.appservice import ( + ApplicationServiceTransactionWorkerStore, + ApplicationServiceWorkerStore, +) from synapse.storage.databases.main.censor_events import CensorEventsStore from synapse.storage.databases.main.client_ips import ClientIpWorkerStore +from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore +from synapse.storage.databases.main.directory import DirectoryWorkerStore from synapse.storage.databases.main.e2e_room_keys import EndToEndRoomKeyStore from synapse.storage.databases.main.lock import LockStore from synapse.storage.databases.main.media_repository import MediaRepositoryStore @@ -110,11 +109,15 @@ from synapse.storage.databases.main.monthly_active_users import ( MonthlyActiveUsersWorkerStore, ) from synapse.storage.databases.main.presence import PresenceStore +from synapse.storage.databases.main.profile import ProfileWorkerStore +from synapse.storage.databases.main.receipts import ReceiptsWorkerStore +from synapse.storage.databases.main.registration import RegistrationWorkerStore from synapse.storage.databases.main.room import RoomWorkerStore from synapse.storage.databases.main.room_batch import RoomBatchStore from synapse.storage.databases.main.search import SearchStore from synapse.storage.databases.main.session import SessionStore from synapse.storage.databases.main.stats import StatsStore +from synapse.storage.databases.main.tags import TagsWorkerStore from synapse.storage.databases.main.transactions import TransactionWorkerStore from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore from synapse.storage.databases.main.user_directory import UserDirectoryStore @@ -227,11 +230,11 @@ class GenericWorkerSlavedStore( UIAuthWorkerStore, EndToEndRoomKeyStore, PresenceStore, - SlavedDeviceInboxStore, + DeviceInboxWorkerStore, SlavedDeviceStore, - SlavedReceiptsStore, SlavedPushRuleStore, - SlavedAccountDataStore, + TagsWorkerStore, + AccountDataWorkerStore, SlavedPusherStore, CensorEventsStore, ClientIpWorkerStore, @@ -239,19 +242,20 @@ class GenericWorkerSlavedStore( SlavedKeyStore, RoomWorkerStore, RoomBatchStore, - DirectoryStore, - SlavedApplicationServiceStore, - SlavedRegistrationStore, - SlavedProfileStore, + DirectoryWorkerStore, + ApplicationServiceTransactionWorkerStore, + ApplicationServiceWorkerStore, + ProfileWorkerStore, SlavedFilteringStore, MonthlyActiveUsersWorkerStore, MediaRepositoryStore, ServerMetricsStore, + ReceiptsWorkerStore, + RegistrationWorkerStore, SearchStore, TransactionWorkerStore, LockStore, SessionStore, - BaseSlavedStore, ): # Properties that multiple storage classes define. Tell mypy what the # expected type is. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 745e704141..6bafa7d3f3 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -44,7 +44,6 @@ from synapse.app._base import ( register_start, ) from synapse.config._base import ConfigError, format_config_error -from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.homeserver import HomeServerConfig from synapse.config.server import ListenerConfig from synapse.federation.transport.server import TransportLayerServer @@ -202,7 +201,7 @@ class SynapseHomeServer(HomeServer): } ) - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.can_verify_email: from synapse.rest.synapse.client.password_reset import ( PasswordResetSubmitTokenResource, ) diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index df1c214462..0963fb3bb4 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -53,6 +53,18 @@ sent_events_counter = Counter( "synapse_appservice_api_sent_events", "Number of events sent to the AS", ["service"] ) +sent_ephemeral_counter = Counter( + "synapse_appservice_api_sent_ephemeral", + "Number of ephemeral events sent to the AS", + ["service"], +) + +sent_todevice_counter = Counter( + "synapse_appservice_api_sent_todevice", + "Number of todevice messages sent to the AS", + ["service"], +) + HOUR_IN_MS = 60 * 60 * 1000 @@ -310,6 +322,8 @@ class ApplicationServiceApi(SimpleHttpClient): ) sent_transactions_counter.labels(service.id).inc() sent_events_counter.labels(service.id).inc(len(serialized_events)) + sent_ephemeral_counter.labels(service.id).inc(len(ephemeral)) + sent_todevice_counter.labels(service.id).inc(len(to_device_messages)) return True except CodeMessageException as e: logger.warning( diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index de5e5216c2..430ffbcd1f 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -319,7 +319,9 @@ class _ServiceQueuer: rooms_of_interesting_users.update(event.room_id for event in events) # EDUs rooms_of_interesting_users.update( - ephemeral["room_id"] for ephemeral in ephemerals + ephemeral["room_id"] + for ephemeral in ephemerals + if ephemeral.get("room_id") is not None ) # Look up the AS users in those rooms @@ -329,8 +331,9 @@ class _ServiceQueuer: ) # Add recipients of to-device messages. - # device_message["user_id"] is the ID of the recipient. - users.update(device_message["user_id"] for device_message in to_device_messages) + users.update( + device_message["to_user_id"] for device_message in to_device_messages + ) # Compute and return the counts / fallback key usage states otk_counts = await self._store.count_bulk_e2e_one_time_keys_for_as(users) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 095eca16c5..7c9cf403ef 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -97,16 +97,16 @@ def format_config_error(e: ConfigError) -> Iterator[str]: # We split these messages out to allow packages to override with package # specific instructions. MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\ -Please opt in or out of reporting anonymized homeserver usage statistics, by -setting the `report_stats` key in your config file to either True or False. +Please opt in or out of reporting homeserver usage statistics, by setting +the `report_stats` key in your config file to either True or False. """ MISSING_REPORT_STATS_SPIEL = """\ We would really appreciate it if you could help our project out by reporting -anonymized usage statistics from your homeserver. Only very basic aggregate -data (e.g. number of users) will be reported, but it helps us to track the -growth of the Matrix community, and helps us to make Matrix a success, as well -as to convince other networks that they should peer with us. +homeserver usage statistics from your homeserver. Your homeserver's server name, +along with very basic aggregate data (e.g. number of users) will be reported. But +it helps us to track the growth of the Matrix community, and helps us to make Matrix +a success, as well as to convince other networks that they should peer with us. Thank you. """ @@ -621,7 +621,7 @@ class RootConfig: generate_group.add_argument( "--report-stats", action="store", - help="Whether the generated config reports anonymized usage statistics.", + help="Whether the generated config reports homeserver usage statistics.", choices=["yes", "no"], ) generate_group.add_argument( diff --git a/synapse/config/cache.py b/synapse/config/cache.py index d0b491ea6c..2db8cfb005 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -21,7 +21,7 @@ from typing import Any, Callable, Dict, Optional import attr from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from ._base import Config, ConfigError @@ -159,12 +159,7 @@ class CacheConfig(Config): self.track_memory_usage = cache_config.get("track_memory_usage", False) if self.track_memory_usage: - try: - check_requirements("cache_memory") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("cache_memory") expire_caches = cache_config.get("expire_caches", True) cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m") @@ -206,7 +201,7 @@ class CacheConfig(Config): self.cache_autotuning["min_cache_ttl"] = self.parse_duration(min_cache_ttl) self.sync_response_cache_duration = self.parse_duration( - cache_config.get("sync_response_cache_duration", 0) + cache_config.get("sync_response_cache_duration", "2m") ) def resize_all_caches(self) -> None: diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index c82f3ee7a3..3ead80d985 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -18,7 +18,6 @@ import email.utils import logging import os -from enum import Enum from typing import Any import attr @@ -131,41 +130,22 @@ class EmailConfig(Config): self.email_enable_notifs = email_config.get("enable_notifs", False) - self.threepid_behaviour_email = ( - # Have Synapse handle the email sending if account_threepid_delegates.email - # is not defined - # msisdn is currently always remote while Synapse does not support any method of - # sending SMS messages - ThreepidBehaviour.REMOTE - if self.root.registration.account_threepid_delegate_email - else ThreepidBehaviour.LOCAL - ) - if config.get("trust_identity_server_for_password_resets"): raise ConfigError( 'The config option "trust_identity_server_for_password_resets" ' - 'has been replaced by "account_threepid_delegate". ' - "Please consult the sample config at docs/sample_config.yaml for " - "details and update your config file." + "is no longer supported. Please remove it from the config file." ) - self.local_threepid_handling_disabled_due_to_email_config = False - if ( - self.threepid_behaviour_email == ThreepidBehaviour.LOCAL - and email_config == {} - ): - # We cannot warn the user this has happened here - # Instead do so when a user attempts to reset their password - self.local_threepid_handling_disabled_due_to_email_config = True - - self.threepid_behaviour_email = ThreepidBehaviour.OFF + # If we have email config settings, assume that we can verify ownership of + # email addresses. + self.can_verify_email = email_config != {} # Get lifetime of a validation token in milliseconds self.email_validation_token_lifetime = self.parse_duration( email_config.get("validation_token_lifetime", "1h") ) - if self.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.can_verify_email: missing = [] if not self.email_notif_from: missing.append("email.notif_from") @@ -356,18 +336,3 @@ class EmailConfig(Config): "Config option email.invite_client_location must be a http or https URL", path=("email", "invite_client_location"), ) - - -class ThreepidBehaviour(Enum): - """ - Enum to define the behaviour of Synapse with regards to when it contacts an identity - server for 3pid registration and password resets - - REMOTE = use an external server to send tokens - LOCAL = send tokens ourselves - OFF = disable registration via 3pid and password resets - """ - - REMOTE = "remote" - LOCAL = "local" - OFF = "off" diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 0a285dba31..ee443cea00 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -87,3 +87,6 @@ class ExperimentalConfig(Config): # MSC3715: dir param on /relations. self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False) + + # MSC3827: Filtering of /publicRooms by room type + self.msc3827_enabled: bool = experimental.get("msc3827_enabled", False) diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py index 7e3c764b2c..a973bb5080 100644 --- a/synapse/config/jwt.py +++ b/synapse/config/jwt.py @@ -15,14 +15,9 @@ from typing import Any from synapse.types import JsonDict +from synapse.util.check_dependencies import check_requirements -from ._base import Config, ConfigError - -MISSING_JWT = """Missing jwt library. This is required for jwt login. - - Install by running: - pip install pyjwt - """ +from ._base import Config class JWTConfig(Config): @@ -41,13 +36,7 @@ class JWTConfig(Config): # that the claims exist on the JWT. self.jwt_issuer = jwt_config.get("issuer") self.jwt_audiences = jwt_config.get("audiences") - - try: - import jwt - - jwt # To stop unused lint. - except ImportError: - raise ConfigError(MISSING_JWT) + check_requirements("jwt") else: self.jwt_enabled = False self.jwt_secret = None diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index d636507886..3b42be5b5b 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -18,7 +18,7 @@ from typing import Any, Optional import attr from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from ._base import Config, ConfigError @@ -57,12 +57,7 @@ class MetricsConfig(Config): self.sentry_enabled = "sentry" in config if self.sentry_enabled: - try: - check_requirements("sentry") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("sentry") self.sentry_dsn = config["sentry"].get("dsn") if not self.sentry_dsn: diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py index 98e8cd8b5a..5418a332da 100644 --- a/synapse/config/oidc.py +++ b/synapse/config/oidc.py @@ -24,7 +24,7 @@ from synapse.types import JsonDict from synapse.util.module_loader import load_module from synapse.util.stringutils import parse_and_validate_mxc_uri -from ..util.check_dependencies import DependencyException, check_requirements +from ..util.check_dependencies import check_requirements from ._base import Config, ConfigError, read_file DEFAULT_USER_MAPPING_PROVIDER = "synapse.handlers.oidc.JinjaOidcMappingProvider" @@ -41,12 +41,7 @@ class OIDCConfig(Config): if not self.oidc_providers: return - try: - check_requirements("oidc") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) from e + check_requirements("oidc") # check we don't have any duplicate idp_ids now. (The SSO handler will also # check for duplicates when the REST listeners get registered, but that happens @@ -146,7 +141,6 @@ OIDC_PROVIDER_CONFIG_WITH_ID_SCHEMA = { "allOf": [OIDC_PROVIDER_CONFIG_SCHEMA, {"required": ["idp_id", "idp_name"]}] } - # the `oidc_providers` list can either be None (as it is in the default config), or # a list of provider configs, each of which requires an explicit ID and name. OIDC_PROVIDER_LIST_SCHEMA = { diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index d4090a1f9a..5a91917b4a 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -112,6 +112,13 @@ class RatelimitConfig(Config): defaults={"per_second": 0.01, "burst_count": 10}, ) + # Track the rate of joins to a given room. If there are too many, temporarily + # prevent local joins and remote joins via this server. + self.rc_joins_per_room = RateLimitConfig( + config.get("rc_joins_per_room", {}), + defaults={"per_second": 1, "burst_count": 10}, + ) + # Ratelimit cross-user key requests: # * For local requests this is keyed by the sending device. # * For requests received over federation this is keyed by the origin. @@ -136,6 +143,11 @@ class RatelimitConfig(Config): defaults={"per_second": 0.003, "burst_count": 5}, ) + self.rc_invites_per_issuer = RateLimitConfig( + config.get("rc_invites", {}).get("per_issuer", {}), + defaults={"per_second": 0.3, "burst_count": 10}, + ) + self.rc_third_party_invite = RateLimitConfig( config.get("rc_third_party_invite", {}), defaults={ diff --git a/synapse/config/registration.py b/synapse/config/registration.py index fcf99be092..685a0423c5 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -20,6 +20,13 @@ from synapse.config._base import Config, ConfigError from synapse.types import JsonDict, RoomAlias, UserID from synapse.util.stringutils import random_string_with_symbols, strtobool +NO_EMAIL_DELEGATE_ERROR = """\ +Delegation of email verification to an identity server is no longer supported. To +continue to allow users to add email addresses to their accounts, and use them for +password resets, configure Synapse with an SMTP server via the `email` setting, and +remove `account_threepid_delegates.email`. +""" + class RegistrationConfig(Config): section = "registration" @@ -51,7 +58,9 @@ class RegistrationConfig(Config): self.bcrypt_rounds = config.get("bcrypt_rounds", 12) account_threepid_delegates = config.get("account_threepid_delegates") or {} - self.account_threepid_delegate_email = account_threepid_delegates.get("email") + if "email" in account_threepid_delegates: + raise ConfigError(NO_EMAIL_DELEGATE_ERROR) + # self.account_threepid_delegate_email = account_threepid_delegates.get("email") self.account_threepid_delegate_msisdn = account_threepid_delegates.get("msisdn") self.default_identity_server = config.get("default_identity_server") self.allow_guest_access = config.get("allow_guest_access", False) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index aadec1e54e..1033496bb4 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -21,7 +21,7 @@ import attr from synapse.config.server import generate_ip_set from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from synapse.util.module_loader import load_module from ._base import Config, ConfigError @@ -42,6 +42,18 @@ THUMBNAIL_SIZE_YAML = """\ # method: %(method)s """ +# A map from the given media type to the type of thumbnail we should generate +# for it. +THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP = { + "image/jpeg": "jpeg", + "image/jpg": "jpeg", + "image/webp": "jpeg", + # Thumbnails can only be jpeg or png. We choose png thumbnails for gif + # because it can have transparency. + "image/gif": "png", + "image/png": "png", +} + HTTP_PROXY_SET_WARNING = """\ The Synapse config url_preview_ip_range_blacklist will be ignored as an HTTP(s) proxy is configured.""" @@ -79,13 +91,22 @@ def parse_thumbnail_requirements( width = size["width"] height = size["height"] method = size["method"] - jpeg_thumbnail = ThumbnailRequirement(width, height, method, "image/jpeg") - png_thumbnail = ThumbnailRequirement(width, height, method, "image/png") - requirements.setdefault("image/jpeg", []).append(jpeg_thumbnail) - requirements.setdefault("image/jpg", []).append(jpeg_thumbnail) - requirements.setdefault("image/webp", []).append(jpeg_thumbnail) - requirements.setdefault("image/gif", []).append(png_thumbnail) - requirements.setdefault("image/png", []).append(png_thumbnail) + + for format, thumbnail_format in THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.items(): + requirement = requirements.setdefault(format, []) + if thumbnail_format == "jpeg": + requirement.append( + ThumbnailRequirement(width, height, method, "image/jpeg") + ) + elif thumbnail_format == "png": + requirement.append( + ThumbnailRequirement(width, height, method, "image/png") + ) + else: + raise Exception( + "Unknown thumbnail mapping from %s to %s. This is a Synapse problem, please report!" + % (format, thumbnail_format) + ) return { media_type: tuple(thumbnails) for media_type, thumbnails in requirements.items() } @@ -184,13 +205,7 @@ class ContentRepositoryConfig(Config): ) self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: - try: - check_requirements("url_preview") - - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("url_preview") proxy_env = getproxies_environment() if "url_preview_ip_range_blacklist" not in config: diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py index bd7c234d31..49ca663dde 100644 --- a/synapse/config/saml2.py +++ b/synapse/config/saml2.py @@ -18,7 +18,7 @@ from typing import Any, List, Set from synapse.config.sso import SsoAttributeRequirement from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from synapse.util.module_loader import load_module, load_python_module from ._base import Config, ConfigError @@ -76,12 +76,7 @@ class SAML2Config(Config): if not saml2_config.get("sp_config") and not saml2_config.get("config_path"): return - try: - check_requirements("saml2") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("saml2") self.saml2_enabled = True diff --git a/synapse/config/server.py b/synapse/config/server.py index 828938e5ec..085fe22c51 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -301,6 +301,26 @@ class ServerConfig(Config): "public_baseurl cannot contain query parameters or a #-fragment" ) + self.extra_well_known_client_content = config.get( + "extra_well_known_client_content", {} + ) + + if not isinstance(self.extra_well_known_client_content, dict): + raise ConfigError( + "extra_well_known_content must be a dictionary of key-value pairs" + ) + + if "m.homeserver" in self.extra_well_known_client_content: + raise ConfigError( + "m.homeserver is not supported in extra_well_known_content, " + "use public_baseurl in base config instead." + ) + if "m.identity_server" in self.extra_well_known_client_content: + raise ConfigError( + "m.identity_server is not supported in extra_well_known_content, " + "use default_identity_server in base config instead." + ) + # Whether to enable user presence. presence_config = config.get("presence") or {} self.use_presence = presence_config.get("enabled") diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 6fbf927f11..c19270c6c5 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -15,7 +15,7 @@ from typing import Any, List, Set from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from ._base import Config, ConfigError @@ -40,12 +40,7 @@ class TracerConfig(Config): if not self.opentracer_enabled: return - try: - check_requirements("opentracing") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("opentracing") # The tracer is enabled so sanitize the config diff --git a/synapse/event_auth.py b/synapse/event_auth.py index e23503c1e0..965cb265da 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -15,11 +15,12 @@ import logging import typing -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union +from typing import Any, Collection, Dict, Iterable, List, Optional, Set, Tuple, Union from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes from signedjson.sign import SignatureVerifyException, verify_signed_json +from typing_extensions import Protocol from unpaddedbase64 import decode_base64 from synapse.api.constants import ( @@ -35,7 +36,8 @@ from synapse.api.room_versions import ( EventFormatVersions, RoomVersion, ) -from synapse.types import StateMap, UserID, get_domain_from_id +from synapse.storage.databases.main.events_worker import EventRedactBehaviour +from synapse.types import MutableStateMap, StateMap, UserID, get_domain_from_id if typing.TYPE_CHECKING: # conditional imports to avoid import cycle @@ -45,6 +47,17 @@ if typing.TYPE_CHECKING: logger = logging.getLogger(__name__) +class _EventSourceStore(Protocol): + async def get_events( + self, + event_ids: Collection[str], + redact_behaviour: EventRedactBehaviour, + get_prev_content: bool = False, + allow_rejected: bool = False, + ) -> Dict[str, "EventBase"]: + ... + + def validate_event_for_room_version(event: "EventBase") -> None: """Ensure that the event complies with the limits, and has the right signatures @@ -112,85 +125,127 @@ def validate_event_for_room_version(event: "EventBase") -> None: raise AuthError(403, "Event not signed by authorising server") -def check_auth_rules_for_event( +async def check_state_independent_auth_rules( + store: _EventSourceStore, event: "EventBase", - auth_events: Iterable["EventBase"], ) -> None: - """Check that an event complies with the auth rules + """Check that an event complies with auth rules that are independent of room state - Checks whether an event passes the auth rules with a given set of state events - - Assumes that we have already checked that the event is the right shape (it has - enough signatures, has a room ID, etc). In other words: - - - it's fine for use in state resolution, when we have already decided whether to - accept the event or not, and are now trying to decide whether it should make it - into the room state - - - when we're doing the initial event auth, it is only suitable in combination with - a bunch of other tests. + Runs through the first few auth rules, which are independent of room state. (Which + means that we only need to them once for each received event) Args: + store: the datastore; used to fetch the auth events for validation event: the event being checked. - auth_events: the room state to check the events against. Raises: AuthError if the checks fail """ - # We need to ensure that the auth events are actually for the same room, to - # stop people from using powers they've been granted in other rooms for - # example. - # - # Arguably we don't need to do this when we're just doing state res, as presumably - # the state res algorithm isn't silly enough to give us events from different rooms. - # Still, it's easier to do it anyway. + # Implementation of https://spec.matrix.org/v1.2/rooms/v9/#authorization-rules + + # 1. If type is m.room.create: + if event.type == EventTypes.Create: + _check_create(event) + + # 1.5 Otherwise, allow + return + + # 2. Reject if event has auth_events that: ... + auth_events = await store.get_events( + event.auth_event_ids(), + redact_behaviour=EventRedactBehaviour.as_is, + allow_rejected=True, + ) room_id = event.room_id - for auth_event in auth_events: + auth_dict: MutableStateMap[str] = {} + expected_auth_types = auth_types_for_event(event.room_version, event) + for auth_event_id in event.auth_event_ids(): + auth_event = auth_events.get(auth_event_id) + + # we should have all the auth events by now, so if we do not, that suggests + # a synapse programming error + if auth_event is None: + raise RuntimeError( + f"Event {event.event_id} has unknown auth event {auth_event_id}" + ) + + # We need to ensure that the auth events are actually for the same room, to + # stop people from using powers they've been granted in other rooms for + # example. if auth_event.room_id != room_id: raise AuthError( 403, "During auth for event %s in room %s, found event %s in the state " "which is in room %s" - % (event.event_id, room_id, auth_event.event_id, auth_event.room_id), + % (event.event_id, room_id, auth_event_id, auth_event.room_id), ) - if auth_event.rejected_reason: + + k = (auth_event.type, auth_event.state_key) + + # 2.1 ... have duplicate entries for a given type and state_key pair + if k in auth_dict: raise AuthError( 403, - "During auth for event %s: found rejected event %s in the state" - % (event.event_id, auth_event.event_id), + f"Event {event.event_id} has duplicate auth_events for {k}: {auth_dict[k]} and {auth_event_id}", ) - # Implementation of https://matrix.org/docs/spec/rooms/v1#authorization-rules - # - # 1. If type is m.room.create: - if event.type == EventTypes.Create: - # 1b. If the domain of the room_id does not match the domain of the sender, - # reject. - sender_domain = get_domain_from_id(event.sender) - room_id_domain = get_domain_from_id(event.room_id) - if room_id_domain != sender_domain: + # 2.2 ... have entries whose type and state_key don’t match those specified by + # the auth events selection algorithm described in the server + # specification. + if k not in expected_auth_types: raise AuthError( - 403, "Creation event's room_id domain does not match sender's" + 403, + f"Event {event.event_id} has unexpected auth_event for {k}: {auth_event_id}", ) - # 1c. If content.room_version is present and is not a recognised version, reject - room_version_prop = event.content.get("room_version", "1") - if room_version_prop not in KNOWN_ROOM_VERSIONS: + # We also need to check that the auth event itself is not rejected. + if auth_event.rejected_reason: raise AuthError( 403, - "room appears to have unsupported version %s" % (room_version_prop,), + "During auth for event %s: found rejected event %s in the state" + % (event.event_id, auth_event.event_id), ) - logger.debug("Allowing! %s", event) - return - - auth_dict = {(e.type, e.state_key): e for e in auth_events} + auth_dict[k] = auth_event_id # 3. If event does not have a m.room.create in its auth_events, reject. creation_event = auth_dict.get((EventTypes.Create, ""), None) if not creation_event: raise AuthError(403, "No create event in auth events") + +def check_state_dependent_auth_rules( + event: "EventBase", + auth_events: Iterable["EventBase"], +) -> None: + """Check that an event complies with auth rules that depend on room state + + Runs through the parts of the auth rules that check an event against bits of room + state. + + Note: + + - it's fine for use in state resolution, when we have already decided whether to + accept the event or not, and are now trying to decide whether it should make it + into the room state + + - when we're doing the initial event auth, it is only suitable in combination with + a bunch of other tests (including, but not limited to, check_state_independent_auth_rules). + + Args: + event: the event being checked. + auth_events: the room state to check the events against. + + Raises: + AuthError if the checks fail + """ + # there are no state-dependent auth rules for create events. + if event.type == EventTypes.Create: + logger.debug("Allowing! %s", event) + return + + auth_dict = {(e.type, e.state_key): e for e in auth_events} + # additional check for m.federate creating_domain = get_domain_from_id(event.room_id) originating_domain = get_domain_from_id(event.sender) @@ -274,6 +329,41 @@ def _check_size_limits(event: "EventBase") -> None: raise EventSizeError("event too large") +def _check_create(event: "EventBase") -> None: + """Implementation of the auth rules for m.room.create events + + Args: + event: The `m.room.create` event to be checked + + Raises: + AuthError if the event does not pass the auth rules + """ + assert event.type == EventTypes.Create + + # 1.1 If it has any previous events, reject. + if event.prev_event_ids(): + raise AuthError(403, "Create event has prev events") + + # 1.2 If the domain of the room_id does not match the domain of the sender, + # reject. + sender_domain = get_domain_from_id(event.sender) + room_id_domain = get_domain_from_id(event.room_id) + if room_id_domain != sender_domain: + raise AuthError(403, "Creation event's room_id domain does not match sender's") + + # 1.3 If content.room_version is present and is not a recognised version, reject + room_version_prop = event.content.get("room_version", "1") + if room_version_prop not in KNOWN_ROOM_VERSIONS: + raise AuthError( + 403, + "room appears to have unsupported version %s" % (room_version_prop,), + ) + + # 1.4 If content has no creator field, reject. + if EventContentFields.ROOM_CREATOR not in event.content: + raise AuthError(403, "Create event lacks a 'creator' property") + + def _can_federate(event: "EventBase", auth_events: StateMap["EventBase"]) -> bool: creation_event = auth_events.get((EventTypes.Create, "")) # There should always be a creation event, but if not don't federate. @@ -650,6 +740,32 @@ def _check_power_levels( except Exception: raise SynapseError(400, "Not a valid power level: %s" % (v,)) + # Reject events with stringy power levels if required by room version + if ( + event.type == EventTypes.PowerLevels + and room_version_obj.msc3667_int_only_power_levels + ): + for k, v in event.content.items(): + if k in { + "users_default", + "events_default", + "state_default", + "ban", + "redact", + "kick", + "invite", + }: + if not isinstance(v, int): + raise SynapseError(400, f"{v!r} must be an integer.") + if k in {"events", "notifications", "users"}: + if not isinstance(v, dict) or not all( + isinstance(v, int) for v in v.values() + ): + raise SynapseError( + 400, + f"{v!r} must be a dict wherein all the values are integers.", + ) + key = (event.type, event.state_key) current_state = auth_events.get(key) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 98c203ada0..17f624b68f 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -24,9 +24,11 @@ from synapse.api.room_versions import ( RoomVersion, ) from synapse.crypto.event_signing import add_hashes_and_signatures +from synapse.event_auth import auth_types_for_event from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict from synapse.state import StateHandler from synapse.storage.databases.main import DataStore +from synapse.storage.state import StateFilter from synapse.types import EventID, JsonDict from synapse.util import Clock from synapse.util.stringutils import random_string @@ -120,8 +122,12 @@ class EventBuilder: The signed and hashed event. """ if auth_event_ids is None: - state_ids = await self._state.get_current_state_ids( - self.room_id, prev_event_ids + state_ids = await self._state.compute_state_after_events( + self.room_id, + prev_event_ids, + state_filter=StateFilter.from_types( + auth_types_for_event(self.room_version, self) + ), ) auth_event_ids = self._event_auth_handler.compute_auth_events( self, state_ids diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 32712d2042..4a3bfb38f1 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -21,7 +21,6 @@ from typing import ( Awaitable, Callable, Collection, - Dict, List, Optional, Tuple, @@ -32,10 +31,11 @@ from typing import ( from typing_extensions import Literal import synapse +from synapse.api.errors import Codes from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour -from synapse.types import RoomAlias, UserProfile +from synapse.types import JsonDict, RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable from synapse.util.metrics import Measure @@ -50,12 +50,12 @@ CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ Awaitable[ Union[ str, - "synapse.api.errors.Codes", + Codes, # Highly experimental, not officially part of the spamchecker API, may # disappear without warning depending on the results of ongoing # experiments. # Use this to return additional information as part of an error. - Tuple["synapse.api.errors.Codes", Dict], + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -70,7 +70,12 @@ USER_MAY_JOIN_ROOM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -81,7 +86,12 @@ USER_MAY_INVITE_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -92,7 +102,12 @@ USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -103,7 +118,12 @@ USER_MAY_CREATE_ROOM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -114,7 +134,12 @@ USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -125,7 +150,12 @@ USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -154,7 +184,12 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -345,7 +380,7 @@ class SpamChecker: async def check_event_for_spam( self, event: "synapse.events.EventBase" - ) -> Union[Tuple["synapse.api.errors.Codes", Dict], str]: + ) -> Union[Tuple[Codes, JsonDict], str]: """Checks if a given event is considered "spammy" by this server. If the server considers an event spammy, then it will be rejected if @@ -376,7 +411,16 @@ class SpamChecker: elif res is True: # This spam-checker rejects the event with deprecated # return value `True` - return (synapse.api.errors.Codes.FORBIDDEN, {}) + return synapse.api.errors.Codes.FORBIDDEN, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): + return res + elif isinstance(res, synapse.api.errors.Codes): + return res, {} elif not isinstance(res, str): # mypy complains that we can't reach this code because of the # return type in CHECK_EVENT_FOR_SPAM_CALLBACK, but we don't know @@ -422,7 +466,7 @@ class SpamChecker: async def user_may_join_room( self, user_id: str, room_id: str, is_invited: bool - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, JsonDict], Literal["NOT_SPAM"]]: """Checks if a given users is allowed to join a room. Not called when a user creates a room. @@ -432,7 +476,7 @@ class SpamChecker: is_invited: Whether the user is invited into the room Returns: - NOT_SPAM if the operation is permitted, Codes otherwise. + NOT_SPAM if the operation is permitted, [Codes, Dict] otherwise. """ for callback in self._user_may_join_room_callbacks: with Measure( @@ -443,21 +487,28 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting join as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} # No spam-checker has rejected the request, let it pass. return self.NOT_SPAM async def user_may_invite( self, inviter_userid: str, invitee_userid: str, room_id: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may send an invite Args: @@ -479,21 +530,28 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting invite as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} # No spam-checker has rejected the request, let it pass. return self.NOT_SPAM async def user_may_send_3pid_invite( self, inviter_userid: str, medium: str, address: str, room_id: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may invite a given threepid into the room Note that if the threepid is already associated with a Matrix user ID, Synapse @@ -519,20 +577,27 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting 3pid invite as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM async def user_may_create_room( self, userid: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may create a room Args: @@ -546,20 +611,27 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting room creation as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM async def user_may_create_room_alias( self, userid: str, room_alias: RoomAlias - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may create a room alias Args: @@ -575,20 +647,27 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting room create as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM async def user_may_publish_room( self, userid: str, room_id: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may publish a room to the directory Args: @@ -603,14 +682,21 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting room publication as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM @@ -678,7 +764,7 @@ class SpamChecker: async def check_media_file_for_spam( self, file_wrapper: ReadableFileWrapper, file_info: FileInfo - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a piece of newly uploaded media should be blocked. This will be called for local uploads, downloads of remote media, each @@ -715,13 +801,20 @@ class SpamChecker: if res is False or res is self.NOT_SPAM: continue elif res is True: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting media file as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 35f3f3690f..72ab696898 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -464,14 +464,7 @@ class ThirdPartyEventRules: Returns: A dict mapping (event type, state key) to state event. """ - state_ids = await self._storage_controllers.state.get_current_state_ids(room_id) - room_state_events = await self.store.get_events(state_ids.values()) - - state_events = {} - for key, event_id in state_ids.items(): - state_events[key] = room_state_events[event_id] - - return state_events + return await self._storage_controllers.state.get_current_state(room_id) async def on_profile_update( self, user_id: str, new_profile: ProfileInfo, by_admin: bool, deactivation: bool diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 66e6305562..842f5327c2 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -53,7 +53,7 @@ from synapse.api.room_versions import ( RoomVersion, RoomVersions, ) -from synapse.events import EventBase, builder +from synapse.events import EventBase, builder, make_event_from_dict from synapse.federation.federation_base import ( FederationBase, InvalidEventSignatureError, @@ -217,7 +217,7 @@ class FederationClient(FederationBase): ) async def claim_client_keys( - self, destination: str, content: JsonDict, timeout: int + self, destination: str, content: JsonDict, timeout: Optional[int] ) -> JsonDict: """Claims one-time keys for a device hosted on a remote server. @@ -299,7 +299,8 @@ class FederationClient(FederationBase): moving to the next destination. None indicates no timeout. Returns: - The requested PDU, or None if we were unable to find it. + A copy of the requested PDU that is safe to modify, or None if we + were unable to find it. Raises: SynapseError, NotRetryingDestination, FederationDeniedError @@ -309,7 +310,7 @@ class FederationClient(FederationBase): ) logger.debug( - "retrieved event id %s from %s: %r", + "get_pdu_from_destination_raw: retrieved event id %s from %s: %r", event_id, destination, transaction_data, @@ -358,54 +359,92 @@ class FederationClient(FederationBase): The requested PDU, or None if we were unable to find it. """ + logger.debug( + "get_pdu: event_id=%s from destinations=%s", event_id, destinations + ) + # TODO: Rate limit the number of times we try and get the same event. - ev = self._get_pdu_cache.get(event_id) - if ev: - return ev + # We might need the same event multiple times in quick succession (before + # it gets persisted to the database), so we cache the results of the lookup. + # Note that this is separate to the regular get_event cache which caches + # events once they have been persisted. + event = self._get_pdu_cache.get(event_id) + + # If we don't see the event in the cache, go try to fetch it from the + # provided remote federated destinations + if not event: + pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {}) + + for destination in destinations: + now = self._clock.time_msec() + last_attempt = pdu_attempts.get(destination, 0) + if last_attempt + PDU_RETRY_TIME_MS > now: + logger.debug( + "get_pdu: skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)", + destination, + last_attempt, + PDU_RETRY_TIME_MS, + now, + ) + continue + + try: + event = await self.get_pdu_from_destination_raw( + destination=destination, + event_id=event_id, + room_version=room_version, + timeout=timeout, + ) - pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {}) + pdu_attempts[destination] = now - signed_pdu = None - for destination in destinations: - now = self._clock.time_msec() - last_attempt = pdu_attempts.get(destination, 0) - if last_attempt + PDU_RETRY_TIME_MS > now: - continue + if event: + # Prime the cache + self._get_pdu_cache[event.event_id] = event - try: - signed_pdu = await self.get_pdu_from_destination_raw( - destination=destination, - event_id=event_id, - room_version=room_version, - timeout=timeout, - ) + # FIXME: We should add a `break` here to avoid calling every + # destination after we already found a PDU (will follow-up + # in a separate PR) - pdu_attempts[destination] = now - - except SynapseError as e: - logger.info( - "Failed to get PDU %s from %s because %s", event_id, destination, e - ) - continue - except NotRetryingDestination as e: - logger.info(str(e)) - continue - except FederationDeniedError as e: - logger.info(str(e)) - continue - except Exception as e: - pdu_attempts[destination] = now + except SynapseError as e: + logger.info( + "Failed to get PDU %s from %s because %s", + event_id, + destination, + e, + ) + continue + except NotRetryingDestination as e: + logger.info(str(e)) + continue + except FederationDeniedError as e: + logger.info(str(e)) + continue + except Exception as e: + pdu_attempts[destination] = now + + logger.info( + "Failed to get PDU %s from %s because %s", + event_id, + destination, + e, + ) + continue - logger.info( - "Failed to get PDU %s from %s because %s", event_id, destination, e - ) - continue + if not event: + return None - if signed_pdu: - self._get_pdu_cache[event_id] = signed_pdu + # `event` now refers to an object stored in `get_pdu_cache`. Our + # callers may need to modify the returned object (eg to set + # `event.internal_metadata.outlier = true`), so we return a copy + # rather than the original object. + event_copy = make_event_from_dict( + event.get_pdu_json(), + event.room_version, + ) - return signed_pdu + return event_copy async def get_room_state_ids( self, destination: str, room_id: str, event_id: str diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 3e1518f1f6..ae550d3f4d 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -67,6 +67,7 @@ from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, ReplicationGetQueryRestServlet, ) +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.lock import Lock from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util import json_decoder, unwrapFirstError @@ -117,6 +118,7 @@ class FederationServer(FederationBase): self._federation_event_handler = hs.get_federation_event_handler() self.state = hs.get_state_handler() self._event_auth_handler = hs.get_event_auth_handler() + self._room_member_handler = hs.get_room_member_handler() self._state_storage_controller = hs.get_storage_controllers().state @@ -620,6 +622,15 @@ class FederationServer(FederationBase): ) raise IncompatibleRoomVersionError(room_version=room_version) + # Refuse the request if that room has seen too many joins recently. + # This is in addition to the HS-level rate limiting applied by + # BaseFederationServlet. + # type-ignore: mypy doesn't seem able to deduce the type of the limiter(!?) + await self._room_member_handler._join_rate_per_room_limiter.ratelimit( # type: ignore[has-type] + requester=None, + key=room_id, + update=False, + ) pdu = await self.handler.on_make_join_request(origin, room_id, user_id) return {"event": pdu.get_templated_pdu_json(), "room_version": room_version} @@ -654,6 +665,12 @@ class FederationServer(FederationBase): room_id: str, caller_supports_partial_state: bool = False, ) -> Dict[str, Any]: + await self._room_member_handler._join_rate_per_room_limiter.ratelimit( # type: ignore[has-type] + requester=None, + key=room_id, + update=False, + ) + event, context = await self._on_send_membership_event( origin, content, Membership.JOIN, room_id ) @@ -882,9 +899,20 @@ class FederationServer(FederationBase): logger.warning("%s", errmsg) raise SynapseError(403, errmsg, Codes.FORBIDDEN) - return await self._federation_event_handler.on_send_membership_event( - origin, event - ) + try: + return await self._federation_event_handler.on_send_membership_event( + origin, event + ) + except PartialStateConflictError: + # The room was un-partial stated while we were persisting the event. + # Try once more, with full state this time. + logger.info( + "Room %s was un-partial stated during `on_send_membership_event`, trying again.", + room_id, + ) + return await self._federation_event_handler.on_send_membership_event( + origin, event + ) async def on_event_auth( self, origin: str, room_id: str, event_id: str diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 99a794c042..94a65ac65f 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -351,7 +351,11 @@ class FederationSender(AbstractFederationSender): self._is_processing = True while True: last_token = await self.store.get_federation_out_pos("events") - next_token, events = await self.store.get_all_new_events_stream( + ( + next_token, + events, + event_to_received_ts, + ) = await self.store.get_all_new_events_stream( last_token, self._last_poked_id, limit=100 ) @@ -476,7 +480,7 @@ class FederationSender(AbstractFederationSender): await self._send_pdu(event, sharded_destinations) now = self.clock.time_msec() - ts = await self.store.get_received_ts(event.event_id) + ts = event_to_received_ts[event.event_id] assert ts is not None synapse.metrics.event_processing_lag_by_event.labels( "federation_sender" @@ -509,7 +513,7 @@ class FederationSender(AbstractFederationSender): if events: now = self.clock.time_msec() - ts = await self.store.get_received_ts(events[-1].event_id) + ts = event_to_received_ts[events[-1].event_id] assert ts is not None synapse.metrics.event_processing_lag.labels( diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 9e84bd677e..32074b8ca6 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -619,7 +619,7 @@ class TransportLayerClient: ) async def claim_client_keys( - self, destination: str, query_content: JsonDict, timeout: int + self, destination: str, query_content: JsonDict, timeout: Optional[int] ) -> JsonDict: """Claim one-time keys for a list of devices hosted on a remote server. diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index 84100a5a52..bb0f8d6b7b 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -309,7 +309,7 @@ class BaseFederationServlet: raise # update the active opentracing span with the authenticated entity - set_tag("authenticated_entity", origin) + set_tag("authenticated_entity", str(origin)) # if the origin is authenticated and whitelisted, use its span context # as the parent. diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 814553e098..203b62e015 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -104,14 +104,15 @@ class ApplicationServicesHandler: with Measure(self.clock, "notify_interested_services"): self.is_processing = True try: - limit = 100 upper_bound = -1 while upper_bound < self.current_max: + last_token = await self.store.get_appservice_last_pos() ( upper_bound, events, - ) = await self.store.get_new_events_for_appservice( - self.current_max, limit + event_to_received_ts, + ) = await self.store.get_all_new_events_stream( + last_token, self.current_max, limit=100, get_prev_content=True ) events_by_room: Dict[str, List[EventBase]] = {} @@ -150,7 +151,7 @@ class ApplicationServicesHandler: ) now = self.clock.time_msec() - ts = await self.store.get_received_ts(event.event_id) + ts = event_to_received_ts[event.event_id] assert ts is not None synapse.metrics.event_processing_lag_by_event.labels( @@ -187,7 +188,7 @@ class ApplicationServicesHandler: if events: now = self.clock.time_msec() - ts = await self.store.get_received_ts(events[-1].event_id) + ts = event_to_received_ts[events[-1].event_id] assert ts is not None synapse.metrics.event_processing_lag.labels( diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index b79c551703..1a8379854c 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -118,28 +118,33 @@ class DeviceWorkerHandler: ips = await self.store.get_last_client_ip_by_device(user_id, device_id) _update_device_from_client_ips(device, ips) - set_tag("device", device) - set_tag("ips", ips) + set_tag("device", str(device)) + set_tag("ips", str(ips)) return device - @trace - @measure_func("device.get_user_ids_changed") - async def get_user_ids_changed( - self, user_id: str, from_token: StreamToken - ) -> JsonDict: - """Get list of users that have had the devices updated, or have newly - joined a room, that `user_id` may be interested in. + async def get_device_changes_in_shared_rooms( + self, user_id: str, room_ids: Collection[str], from_token: StreamToken + ) -> Collection[str]: + """Get the set of users whose devices have changed who share a room with + the given user. """ + changed_users = await self.store.get_device_list_changes_in_rooms( + room_ids, from_token.device_list_key + ) - set_tag("user_id", user_id) - set_tag("from_token", from_token) - now_room_key = self.store.get_room_max_token() + if changed_users is not None: + # We also check if the given user has changed their device. If + # they're in no rooms then the above query won't include them. + changed = await self.store.get_users_whose_devices_changed( + from_token.device_list_key, [user_id] + ) + changed_users.update(changed) + return changed_users - room_ids = await self.store.get_rooms_for_user(user_id) + # If the DB returned None then the `from_token` is too old, so we fall + # back on looking for device updates for all users. - # First we check if any devices have changed for users that we share - # rooms with. users_who_share_room = await self.store.get_users_who_share_room_with_user( user_id ) @@ -153,6 +158,27 @@ class DeviceWorkerHandler: from_token.device_list_key, tracked_users ) + return changed + + @trace + @measure_func("device.get_user_ids_changed") + async def get_user_ids_changed( + self, user_id: str, from_token: StreamToken + ) -> JsonDict: + """Get list of users that have had the devices updated, or have newly + joined a room, that `user_id` may be interested in. + """ + + set_tag("user_id", user_id) + set_tag("from_token", str(from_token)) + now_room_key = self.store.get_room_max_token() + + room_ids = await self.store.get_rooms_for_user(user_id) + + changed = await self.get_device_changes_in_shared_rooms( + user_id, room_ids, from_token + ) + # Then work out if any users have since joined rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) @@ -237,10 +263,19 @@ class DeviceWorkerHandler: break if possibly_changed or possibly_left: - # Take the intersection of the users whose devices may have changed - # and those that actually still share a room with the user - possibly_joined = possibly_changed & users_who_share_room - possibly_left = (possibly_changed | possibly_left) - users_who_share_room + possibly_joined = possibly_changed + possibly_left = possibly_changed | possibly_left + + # Double check if we still share rooms with the given user. + users_rooms = await self.store.get_rooms_for_users_with_stream_ordering( + possibly_left + ) + for changed_user_id, entries in users_rooms.items(): + if any(e.room_id in room_ids for e in entries): + possibly_left.discard(changed_user_id) + else: + possibly_joined.discard(changed_user_id) + else: possibly_joined = set() possibly_left = set() @@ -760,7 +795,7 @@ class DeviceListUpdater: """ set_tag("origin", origin) - set_tag("edu_content", edu_content) + set_tag("edu_content", str(edu_content)) user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8b0f16f965..09a7a4b238 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -149,7 +149,8 @@ class DirectoryHandler: raise AuthError( 403, "This user is not permitted to create this alias", - spam_check, + errcode=spam_check[0], + additional_fields=spam_check[1], ) if not self.config.roomdirectory.is_alias_creation_allowed( @@ -441,7 +442,8 @@ class DirectoryHandler: raise AuthError( 403, "This user is not permitted to publish rooms to the room list", - spam_check, + errcode=spam_check[0], + additional_fields=spam_check[1], ) if requester.is_guest: diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 52bb5c9c55..c938339ddd 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Tuple import attr from canonicaljson import encode_canonical_json @@ -92,7 +92,11 @@ class E2eKeysHandler: @trace async def query_devices( - self, query_body: JsonDict, timeout: int, from_user_id: str, from_device_id: str + self, + query_body: JsonDict, + timeout: int, + from_user_id: str, + from_device_id: Optional[str], ) -> JsonDict: """Handle a device key query from a client @@ -120,9 +124,7 @@ class E2eKeysHandler: the number of in-flight queries at a time. """ async with self._query_devices_linearizer.queue((from_user_id, from_device_id)): - device_keys_query: Dict[str, Iterable[str]] = query_body.get( - "device_keys", {} - ) + device_keys_query: Dict[str, List[str]] = query_body.get("device_keys", {}) # separate users by domain. # make a map from domain to user_id to device_ids @@ -136,8 +138,8 @@ class E2eKeysHandler: else: remote_queries[user_id] = device_ids - set_tag("local_key_query", local_query) - set_tag("remote_key_query", remote_queries) + set_tag("local_key_query", str(local_query)) + set_tag("remote_key_query", str(remote_queries)) # First get local devices. # A map of destination -> failure response. @@ -341,7 +343,7 @@ class E2eKeysHandler: failure = _exception_to_failure(e) failures[destination] = failure set_tag("error", True) - set_tag("reason", failure) + set_tag("reason", str(failure)) return @@ -392,7 +394,7 @@ class E2eKeysHandler: @trace async def query_local_devices( - self, query: Dict[str, Optional[List[str]]] + self, query: Mapping[str, Optional[List[str]]] ) -> Dict[str, Dict[str, dict]]: """Get E2E device keys for local users @@ -403,7 +405,7 @@ class E2eKeysHandler: Returns: A map from user_id -> device_id -> device details """ - set_tag("local_query", query) + set_tag("local_query", str(query)) local_query: List[Tuple[str, Optional[str]]] = [] result_dict: Dict[str, Dict[str, dict]] = {} @@ -461,7 +463,7 @@ class E2eKeysHandler: @trace async def claim_one_time_keys( - self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: int + self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: Optional[int] ) -> JsonDict: local_query: List[Tuple[str, str, str]] = [] remote_queries: Dict[str, Dict[str, Dict[str, str]]] = {} @@ -475,8 +477,8 @@ class E2eKeysHandler: domain = get_domain_from_id(user_id) remote_queries.setdefault(domain, {})[user_id] = one_time_keys - set_tag("local_key_query", local_query) - set_tag("remote_key_query", remote_queries) + set_tag("local_key_query", str(local_query)) + set_tag("remote_key_query", str(remote_queries)) results = await self.store.claim_e2e_one_time_keys(local_query) @@ -506,7 +508,7 @@ class E2eKeysHandler: failure = _exception_to_failure(e) failures[destination] = failure set_tag("error", True) - set_tag("reason", failure) + set_tag("reason", str(failure)) await make_deferred_yieldable( defer.gatherResults( @@ -609,7 +611,7 @@ class E2eKeysHandler: result = await self.store.count_e2e_one_time_keys(user_id, device_id) - set_tag("one_time_key_counts", result) + set_tag("one_time_key_counts", str(result)) return {"one_time_key_counts": result} async def _upload_one_time_keys_for_user( diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index 446f509bdc..28dc08c22a 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, Optional +from typing import TYPE_CHECKING, Dict, Optional, cast from typing_extensions import Literal @@ -97,7 +97,7 @@ class E2eRoomKeysHandler: user_id, version, room_id, session_id ) - log_kv(results) + log_kv(cast(JsonDict, results)) return results @trace diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index ed4149bd58..a2dd9c7efa 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -23,7 +23,10 @@ from synapse.api.constants import ( ) from synapse.api.errors import AuthError, Codes, SynapseError from synapse.api.room_versions import RoomVersion -from synapse.event_auth import check_auth_rules_for_event +from synapse.event_auth import ( + check_state_dependent_auth_rules, + check_state_independent_auth_rules, +) from synapse.events import EventBase from synapse.events.builder import EventBuilder from synapse.events.snapshot import EventContext @@ -52,9 +55,10 @@ class EventAuthHandler: context: EventContext, ) -> None: """Check an event passes the auth rules at its own auth events""" + await check_state_independent_auth_rules(self._store, event) auth_event_ids = event.auth_event_ids() auth_events_by_id = await self._store.get_events(auth_event_ids) - check_auth_rules_for_event(event, auth_events_by_id.values()) + check_state_dependent_auth_rules(event, auth_events_by_id.values()) def compute_auth_events( self, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 34cc5ecd11..3b5eaf5156 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -45,6 +45,7 @@ from synapse.api.errors import ( FederationDeniedError, FederationError, HttpResponseException, + LimitExceededError, NotFoundError, RequestSendFailed, SynapseError, @@ -64,6 +65,7 @@ from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, ) +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter from synapse.types import JsonDict, StateMap, get_domain_from_id @@ -549,15 +551,29 @@ class FederationHandler: # https://github.com/matrix-org/synapse/issues/12998 await self.store.store_partial_state_room(room_id, ret.servers_in_room) - max_stream_id = await self._federation_event_handler.process_remote_join( - origin, - room_id, - auth_chain, - state, - event, - room_version_obj, - partial_state=ret.partial_state, - ) + try: + max_stream_id = ( + await self._federation_event_handler.process_remote_join( + origin, + room_id, + auth_chain, + state, + event, + room_version_obj, + partial_state=ret.partial_state, + ) + ) + except PartialStateConflictError as e: + # The homeserver was already in the room and it is no longer partial + # stated. We ought to be doing a local join instead. Turn the error into + # a 429, as a hint to the client to try again. + # TODO(faster_joins): `_should_perform_remote_join` suggests that we may + # do a remote join for restricted rooms even if we have full state. + logger.error( + "Room %s was un-partial stated while processing remote join.", + room_id, + ) + raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) if ret.partial_state: # Kick off the process of asynchronously fetching the state for this @@ -828,7 +844,8 @@ class FederationHandler: raise SynapseError( 403, "This user is not permitted to send invites to this server/user", - spam_check, + errcode=spam_check[0], + additional_fields=spam_check[1], ) membership = event.content.get("membership") @@ -1543,14 +1560,9 @@ class FederationHandler: # all the events are updated, so we can update current state and # clear the lazy-loading flag. logger.info("Updating current state for %s", room_id) - # TODO(faster_joins): support workers + # TODO(faster_joins): notify workers in notify_room_un_partial_stated # https://github.com/matrix-org/synapse/issues/12994 - assert ( - self._storage_controllers.persistence is not None - ), "worker-mode deployments not currently supported here" - await self._storage_controllers.persistence.update_current_state( - room_id - ) + await self.state_handler.update_current_state(room_id) logger.info("Clearing partial-state flag for %s", room_id) success = await self.store.clear_partial_state_room(room_id) @@ -1567,11 +1579,6 @@ class FederationHandler: # we raced against more events arriving with partial state. Go round # the loop again. We've already logged a warning, so no need for more. - # TODO(faster_joins): there is still a race here, whereby incoming events which raced - # with us will fail to be persisted after the call to `clear_partial_state_room` due to - # having partial state. - # https://github.com/matrix-org/synapse/issues/12988 - # continue events = await self.store.get_events_as_list( diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 6c9e6a00b5..16f20c8be7 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections import itertools import logging from http import HTTPStatus @@ -50,19 +51,21 @@ from synapse.api.errors import ( from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion, RoomVersions from synapse.event_auth import ( auth_types_for_event, - check_auth_rules_for_event, + check_state_dependent_auth_rules, + check_state_independent_auth_rules, validate_event_for_room_version, ) from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.federation.federation_client import InvalidResponseError -from synapse.logging.context import nested_logging_context, run_in_background +from synapse.logging.context import nested_logging_context from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( ReplicationFederationSendEventsRestServlet, ) from synapse.state import StateResolutionStore +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter from synapse.types import ( @@ -274,7 +277,16 @@ class FederationEventHandler: affected=pdu.event_id, ) - await self._process_received_pdu(origin, pdu, state_ids=None) + try: + await self._process_received_pdu(origin, pdu, state_ids=None) + except PartialStateConflictError: + # The room was un-partial stated while we were processing the PDU. + # Try once more, with full state this time. + logger.info( + "Room %s was un-partial stated while processing the PDU, trying again.", + room_id, + ) + await self._process_received_pdu(origin, pdu, state_ids=None) async def on_send_membership_event( self, origin: str, event: EventBase @@ -305,6 +317,9 @@ class FederationEventHandler: Raises: SynapseError if the event is not accepted into the room + PartialStateConflictError if the room was un-partial stated in between + computing the state at the event and persisting it. The caller should + retry exactly once in this case. """ logger.debug( "on_send_membership_event: Got event: %s, signatures: %s", @@ -333,7 +348,7 @@ class FederationEventHandler: event.internal_metadata.send_on_behalf_of = origin context = await self._state_handler.compute_event_context(event) - context = await self._check_event_auth(origin, event, context) + await self._check_event_auth(origin, event, context) if context.rejected: raise SynapseError( 403, f"{event.membership} event was rejected", Codes.FORBIDDEN @@ -422,6 +437,8 @@ class FederationEventHandler: Raises: SynapseError if the response is in some way invalid. + PartialStateConflictError if the homeserver is already in the room and it + has been un-partial stated. """ create_event = None for e in state: @@ -469,7 +486,7 @@ class FederationEventHandler: partial_state=partial_state, ) - context = await self._check_event_auth(origin, event, context) + await self._check_event_auth(origin, event, context) if context.rejected: raise SynapseError(400, "Join event was rejected") @@ -749,10 +766,24 @@ class FederationEventHandler: """ logger.info("Processing pulled event %s", event) - # these should not be outliers. - assert ( - not event.internal_metadata.is_outlier() - ), "pulled event unexpectedly flagged as outlier" + # This function should not be used to persist outliers (use something + # else) because this does a bunch of operations that aren't necessary + # (extra work; in particular, it makes sure we have all the prev_events + # and resolves the state across those prev events). If you happen to run + # into a situation where the event you're trying to process/backfill is + # marked as an `outlier`, then you should update that spot to return an + # `EventBase` copy that doesn't have `outlier` flag set. + # + # `EventBase` is used to represent both an event we have not yet + # persisted, and one that we have persisted and now keep in the cache. + # In an ideal world this method would only be called with the first type + # of event, but it turns out that's not actually the case and for + # example, you could get an event from cache that is marked as an + # `outlier` (fix up that spot though). + assert not event.internal_metadata.is_outlier(), ( + "Outlier event passed to _process_pulled_event. " + "To persist an event as a non-outlier, make sure to pass in a copy without `event.internal_metadata.outlier = true`." + ) event_id = event.event_id @@ -762,7 +793,7 @@ class FederationEventHandler: if existing: if not existing.internal_metadata.is_outlier(): logger.info( - "Ignoring received event %s which we have already seen", + "_process_pulled_event: Ignoring received event %s which we have already seen", event_id, ) return @@ -1020,6 +1051,9 @@ class FederationEventHandler: # XXX: this doesn't sound right? it means that we'll end up with incomplete # state. failed_to_fetch = desired_events - event_metadata.keys() + # `event_id` could be missing from `event_metadata` because it's not necessarily + # a state event. We've already checked that we've fetched it above. + failed_to_fetch.discard(event_id) if failed_to_fetch: logger.warning( "Failed to fetch missing state events for %s %s", @@ -1083,28 +1117,27 @@ class FederationEventHandler: state_ids: Normally None, but if we are handling a gap in the graph (ie, we are missing one or more prev_events), the resolved state at the - event + event. Must not be partial state. backfilled: True if this is part of a historical batch of events (inhibits notification to clients, and validation of device keys.) + + PartialStateConflictError: if the room was un-partial stated in between + computing the state at the event and persisting it. The caller should retry + exactly once in this case. Will never be raised if `state_ids` is provided. """ logger.debug("Processing event: %s", event) assert not event.internal_metadata.outlier + context = await self._state_handler.compute_event_context( + event, + state_ids_before_event=state_ids, + ) try: - context = await self._state_handler.compute_event_context( - event, - state_ids_before_event=state_ids, - ) - context = await self._check_event_auth( - origin, - event, - context, - ) + await self._check_event_auth(origin, event, context) except AuthError as e: - # FIXME richvdh 2021/10/07 I don't think this is reachable. Let's log it - # for now - logger.exception("Unexpected AuthError from _check_event_auth") + # This happens only if we couldn't find the auth events. We'll already have + # logged a warning, so now we just convert to a FederationError. raise FederationError("ERROR", e.code, e.msg, affected=event.event_id) if not backfilled and not context.rejected: @@ -1296,6 +1329,53 @@ class FederationEventHandler: marker_event, ) + async def backfill_event_id( + self, destination: str, room_id: str, event_id: str + ) -> EventBase: + """Backfill a single event and persist it as a non-outlier which means + we also pull in all of the state and auth events necessary for it. + + Args: + destination: The homeserver to pull the given event_id from. + room_id: The room where the event is from. + event_id: The event ID to backfill. + + Raises: + FederationError if we are unable to find the event from the destination + """ + logger.info( + "backfill_event_id: event_id=%s from destination=%s", event_id, destination + ) + + room_version = await self._store.get_room_version(room_id) + + event_from_response = await self._federation_client.get_pdu( + [destination], + event_id, + room_version, + ) + + if not event_from_response: + raise FederationError( + "ERROR", + 404, + "Unable to find event_id=%s from destination=%s to backfill." + % (event_id, destination), + affected=event_id, + ) + + # Persist the event we just fetched, including pulling all of the state + # and auth events to de-outlier it. This also sets up the necessary + # `state_groups` for the event. + await self._process_pulled_events( + destination, + [event_from_response], + # Prevent notifications going to clients + backfilled=True, + ) + + return event_from_response + async def _get_events_and_persist( self, destination: str, room_id: str, event_ids: Collection[str] ) -> None: @@ -1430,7 +1510,9 @@ class FederationEventHandler: allow_rejected=True, ) - def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: + events_and_contexts_to_persist: List[Tuple[EventBase, EventContext]] = [] + + async def prep(event: EventBase) -> None: with nested_logging_context(suffix=event.event_id): auth = [] for auth_event_id in event.auth_event_ids(): @@ -1444,7 +1526,7 @@ class FederationEventHandler: event, auth_event_id, ) - return None + return auth.append(ae) # we're not bothering about room state, so flag the event as an outlier. @@ -1453,17 +1535,20 @@ class FederationEventHandler: context = EventContext.for_outlier(self._storage_controllers) try: validate_event_for_room_version(event) - check_auth_rules_for_event(event, auth) + await check_state_independent_auth_rules(self._store, event) + check_state_dependent_auth_rules(event, auth) except AuthError as e: logger.warning("Rejecting %r because %s", event, e) context.rejected = RejectedReason.AUTH_ERROR - return event, context + events_and_contexts_to_persist.append((event, context)) + + for event in fetched_events: + await prep(event) - events_to_persist = (x for x in (prep(event) for event in fetched_events) if x) await self.persist_events_and_notify( room_id, - tuple(events_to_persist), + events_and_contexts_to_persist, # Mark these events backfilled as they're historic events that will # eventually be backfilled. For example, missing events we fetch # during backfill should be marked as backfilled as well. @@ -1471,11 +1556,8 @@ class FederationEventHandler: ) async def _check_event_auth( - self, - origin: str, - event: EventBase, - context: EventContext, - ) -> EventContext: + self, origin: str, event: EventBase, context: EventContext + ) -> None: """ Checks whether an event should be rejected (for failing auth checks). @@ -1485,9 +1567,6 @@ class FederationEventHandler: context: The event context. - Returns: - The updated context object. - Raises: AuthError if we were unable to find copies of the event's auth events. (Most other failures just cause us to set `context.rejected`.) @@ -1502,7 +1581,7 @@ class FederationEventHandler: logger.warning("While validating received event %r: %s", event, e) # TODO: use a different rejected reason here? context.rejected = RejectedReason.AUTH_ERROR - return context + return # next, check that we have all of the event's auth events. # @@ -1514,62 +1593,101 @@ class FederationEventHandler: ) # ... and check that the event passes auth at those auth events. + # https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu: + # 4. Passes authorization rules based on the event’s auth events, + # otherwise it is rejected. try: - check_auth_rules_for_event(event, claimed_auth_events) + await check_state_independent_auth_rules(self._store, event) + check_state_dependent_auth_rules(event, claimed_auth_events) except AuthError as e: logger.warning( "While checking auth of %r against auth_events: %s", event, e ) context.rejected = RejectedReason.AUTH_ERROR - return context + return + + # now check the auth rules pass against the room state before the event + # https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu: + # 5. Passes authorization rules based on the state before the event, + # otherwise it is rejected. + # + # ... however, if we only have partial state for the room, then there is a good + # chance that we'll be missing some of the state needed to auth the new event. + # So, we state-resolve the auth events that we are given against the state that + # we know about, which ensures things like bans are applied. (Note that we'll + # already have checked we have all the auth events, in + # _load_or_fetch_auth_events_for_event above) + if context.partial_state: + room_version = await self._store.get_room_version_id(event.room_id) + + local_state_id_map = await context.get_prev_state_ids() + claimed_auth_events_id_map = { + (ev.type, ev.state_key): ev.event_id for ev in claimed_auth_events + } + + state_for_auth_id_map = ( + await self._state_resolution_handler.resolve_events_with_store( + event.room_id, + room_version, + [local_state_id_map, claimed_auth_events_id_map], + event_map=None, + state_res_store=StateResolutionStore(self._store), + ) + ) + else: + event_types = event_auth.auth_types_for_event(event.room_version, event) + state_for_auth_id_map = await context.get_prev_state_ids( + StateFilter.from_types(event_types) + ) - # now check auth against what we think the auth events *should* be. - event_types = event_auth.auth_types_for_event(event.room_version, event) - prev_state_ids = await context.get_prev_state_ids( - StateFilter.from_types(event_types) + calculated_auth_event_ids = self._event_auth_handler.compute_auth_events( + event, state_for_auth_id_map, for_verification=True ) - auth_events_ids = self._event_auth_handler.compute_auth_events( - event, prev_state_ids, for_verification=True + # if those are the same, we're done here. + if collections.Counter(event.auth_event_ids()) == collections.Counter( + calculated_auth_event_ids + ): + return + + # otherwise, re-run the auth checks based on what we calculated. + calculated_auth_events = await self._store.get_events_as_list( + calculated_auth_event_ids ) - auth_events_x = await self._store.get_events(auth_events_ids) + + # log the differences + + claimed_auth_event_map = {(e.type, e.state_key): e for e in claimed_auth_events} calculated_auth_event_map = { - (e.type, e.state_key): e for e in auth_events_x.values() + (e.type, e.state_key): e for e in calculated_auth_events } + logger.info( + "event's auth_events are different to our calculated auth_events. " + "Claimed but not calculated: %s. Calculated but not claimed: %s", + [ + ev + for k, ev in claimed_auth_event_map.items() + if k not in calculated_auth_event_map + or calculated_auth_event_map[k].event_id != ev.event_id + ], + [ + ev + for k, ev in calculated_auth_event_map.items() + if k not in claimed_auth_event_map + or claimed_auth_event_map[k].event_id != ev.event_id + ], + ) try: - updated_auth_events = await self._update_auth_events_for_auth( + check_state_dependent_auth_rules(event, calculated_auth_events) + except AuthError as e: + logger.warning( + "While checking auth of %r against room state before the event: %s", event, - calculated_auth_event_map=calculated_auth_event_map, - ) - except Exception: - # We don't really mind if the above fails, so lets not fail - # processing if it does. However, it really shouldn't fail so - # let's still log as an exception since we'll still want to fix - # any bugs. - logger.exception( - "Failed to double check auth events for %s with remote. " - "Ignoring failure and continuing processing of event.", - event.event_id, - ) - updated_auth_events = None - - if updated_auth_events: - context = await self._update_context_for_auth_events( - event, context, updated_auth_events + e, ) - auth_events_for_auth = updated_auth_events - else: - auth_events_for_auth = calculated_auth_event_map - - try: - check_auth_rules_for_event(event, auth_events_for_auth.values()) - except AuthError as e: - logger.warning("Failed auth resolution for %r because %s", event, e) context.rejected = RejectedReason.AUTH_ERROR - return context - async def _maybe_kick_guest_users(self, event: EventBase) -> None: if event.type != EventTypes.GuestAccess: return @@ -1593,11 +1711,21 @@ class FederationEventHandler: """Checks if we should soft fail the event; if so, marks the event as such. + Does nothing for events in rooms with partial state, since we may not have an + accurate membership event for the sender in the current state. + Args: event state_ids: The state at the event if we don't have all the event's prev events origin: The host the event originates from. """ + if await self._store.is_partial_state_room(event.room_id): + # We might not know the sender's membership in the current state, so don't + # soft fail anything. Even if we do have a membership for the sender in the + # current state, it may have been derived from state resolution between + # partial and full state and may not be accurate. + return + extrem_ids_list = await self._store.get_latest_event_ids_in_room(event.room_id) extrem_ids = set(extrem_ids_list) prev_event_ids = set(event.prev_event_ids()) @@ -1663,7 +1791,7 @@ class FederationEventHandler: ) try: - check_auth_rules_for_event(event, current_auth_events) + check_state_dependent_auth_rules(event, current_auth_events) except AuthError as e: logger.warning( "Soft-failing %r (from %s) because %s", @@ -1679,93 +1807,6 @@ class FederationEventHandler: soft_failed_event_counter.inc() event.internal_metadata.soft_failed = True - async def _update_auth_events_for_auth( - self, - event: EventBase, - calculated_auth_event_map: StateMap[EventBase], - ) -> Optional[StateMap[EventBase]]: - """Helper for _check_event_auth. See there for docs. - - Checks whether a given event has the expected auth events. If it - doesn't then we talk to the remote server to compare state to see if - we can come to a consensus (e.g. if one server missed some valid - state). - - This attempts to resolve any potential divergence of state between - servers, but is not essential and so failures should not block further - processing of the event. - - Args: - event: - - calculated_auth_event_map: - Our calculated auth_events based on the state of the room - at the event's position in the DAG. - - Returns: - updated auth event map, or None if no changes are needed. - - """ - assert not event.internal_metadata.outlier - - # check for events which are in the event's claimed auth_events, but not - # in our calculated event map. - event_auth_events = set(event.auth_event_ids()) - different_auth = event_auth_events.difference( - e.event_id for e in calculated_auth_event_map.values() - ) - - if not different_auth: - return None - - logger.info( - "auth_events refers to events which are not in our calculated auth " - "chain: %s", - different_auth, - ) - - # XXX: currently this checks for redactions but I'm not convinced that is - # necessary? - different_events = await self._store.get_events_as_list(different_auth) - - # double-check they're all in the same room - we should already have checked - # this but it doesn't hurt to check again. - for d in different_events: - assert ( - d.room_id == event.room_id - ), f"Event {event.event_id} refers to auth_event {d.event_id} which is in a different room" - - # now we state-resolve between our own idea of the auth events, and the remote's - # idea of them. - - local_state = calculated_auth_event_map.values() - remote_auth_events = dict(calculated_auth_event_map) - remote_auth_events.update({(d.type, d.state_key): d for d in different_events}) - remote_state = remote_auth_events.values() - - room_version = await self._store.get_room_version_id(event.room_id) - new_state = await self._state_handler.resolve_events( - room_version, (local_state, remote_state), event - ) - different_state = { - (d.type, d.state_key): d - for d in new_state.values() - if calculated_auth_event_map.get((d.type, d.state_key)) != d - } - if not different_state: - logger.info("State res returned no new state") - return None - - logger.info( - "After state res: updating auth_events with new state %s", - different_state.values(), - ) - - # take a copy of calculated_auth_event_map before we modify it. - auth_events = dict(calculated_auth_event_map) - auth_events.update(different_state) - return auth_events - async def _load_or_fetch_auth_events_for_event( self, destination: str, event: EventBase ) -> Collection[EventBase]: @@ -1863,61 +1904,6 @@ class FederationEventHandler: await self._auth_and_persist_outliers(room_id, remote_auth_events) - async def _update_context_for_auth_events( - self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase] - ) -> EventContext: - """Update the state_ids in an event context after auth event resolution, - storing the changes as a new state group. - - Args: - event: The event we're handling the context for - - context: initial event context - - auth_events: Events to update in the event context. - - Returns: - new event context - """ - # exclude the state key of the new event from the current_state in the context. - if event.is_state(): - event_key: Optional[Tuple[str, str]] = (event.type, event.state_key) - else: - event_key = None - state_updates = { - k: a.event_id for k, a in auth_events.items() if k != event_key - } - - current_state_ids = await context.get_current_state_ids() - current_state_ids = dict(current_state_ids) # type: ignore - - current_state_ids.update(state_updates) - - prev_state_ids = await context.get_prev_state_ids() - prev_state_ids = dict(prev_state_ids) - - prev_state_ids.update({k: a.event_id for k, a in auth_events.items()}) - - # create a new state group as a delta from the existing one. - prev_group = context.state_group - state_group = await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=prev_group, - delta_ids=state_updates, - current_state_ids=current_state_ids, - ) - - return EventContext.with_state( - storage=self._storage_controllers, - state_group=state_group, - state_group_before_event=context.state_group_before_event, - state_delta_due_to_event=state_updates, - prev_group=prev_group, - delta_ids=state_updates, - partial_state=context.partial_state, - ) - async def _run_push_actions_and_persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False ) -> None: @@ -1927,6 +1913,9 @@ class FederationEventHandler: event: The event itself. context: The event context. backfilled: True if the event was backfilled. + + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # this method should not be called on outliers (those code paths call # persist_events_and_notify directly.) @@ -1957,9 +1946,7 @@ class FederationEventHandler: event.room_id, [(event, context)], backfilled=backfilled ) except Exception: - run_in_background( - self._store.remove_push_actions_from_staging, event.event_id - ) + await self._store.remove_push_actions_from_staging(event.event_id) raise async def persist_events_and_notify( @@ -1981,6 +1968,10 @@ class FederationEventHandler: Returns: The stream ID after which all events have been persisted. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ if not event_and_contexts: return self._store.get_room_max_stream_ordering() @@ -1989,14 +1980,19 @@ class FederationEventHandler: if instance != self._instance_name: # Limit the number of events sent over replication. We choose 200 # here as that is what we default to in `max_request_body_size(..)` - for batch in batch_iter(event_and_contexts, 200): - result = await self._send_events( - instance_name=instance, - store=self._store, - room_id=room_id, - event_and_contexts=batch, - backfilled=backfilled, - ) + try: + for batch in batch_iter(event_and_contexts, 200): + result = await self._send_events( + instance_name=instance, + store=self._store, + room_id=room_id, + event_and_contexts=batch, + backfilled=backfilled, + ) + except SynapseError as e: + if e.code == HTTPStatus.CONFLICT: + raise PartialStateConflictError() + raise return result["max_stream_id"] else: assert self._storage_controllers.persistence @@ -2058,6 +2054,10 @@ class FederationEventHandler: event, event_pos, max_stream_token, extra_users=extra_users ) + if event.type == EventTypes.Member and event.membership == Membership.JOIN: + # TODO retrieve the previous state, and exclude join -> join transitions + self._notifier.notify_user_joined_room(event.event_id, event.room_id) + def _sanity_check_event(self, ev: EventBase) -> None: """ Do some early sanity checks of a received event diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 9bca2bc4b2..9571d461c8 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -26,7 +26,6 @@ from synapse.api.errors import ( SynapseError, ) from synapse.api.ratelimiting import Ratelimiter -from synapse.config.emailconfig import ThreepidBehaviour from synapse.http import RequestTimedOutError from synapse.http.client import SimpleHttpClient from synapse.http.site import SynapseRequest @@ -163,8 +162,7 @@ class IdentityHandler: sid: str, mxid: str, id_server: str, - id_access_token: Optional[str] = None, - use_v2: bool = True, + id_access_token: str, ) -> JsonDict: """Bind a 3PID to an identity server @@ -174,8 +172,7 @@ class IdentityHandler: mxid: The MXID to bind the 3PID to id_server: The domain of the identity server to query id_access_token: The access token to authenticate to the identity - server with, if necessary. Required if use_v2 is true - use_v2: Whether to use v2 Identity Service API endpoints. Defaults to True + server with Raises: SynapseError: On any of the following conditions @@ -187,24 +184,15 @@ class IdentityHandler: """ logger.debug("Proxying threepid bind request for %s to %s", mxid, id_server) - # If an id_access_token is not supplied, force usage of v1 - if id_access_token is None: - use_v2 = False - if not valid_id_server_location(id_server): raise SynapseError( 400, "id_server must be a valid hostname with optional port and path components", ) - # Decide which API endpoint URLs to use - headers = {} bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} - if use_v2: - bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) - headers["Authorization"] = create_id_access_token_header(id_access_token) # type: ignore - else: - bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) + bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) + headers = {"Authorization": create_id_access_token_header(id_access_token)} try: # Use the blacklisting http client as this call is only to identity servers @@ -223,21 +211,14 @@ class IdentityHandler: return data except HttpResponseException as e: - if e.code != 404 or not use_v2: - logger.error("3PID bind failed with Matrix error: %r", e) - raise e.to_synapse_error() + logger.error("3PID bind failed with Matrix error: %r", e) + raise e.to_synapse_error() except RequestTimedOutError: raise SynapseError(500, "Timed out contacting identity server") except CodeMessageException as e: data = json_decoder.decode(e.msg) # XXX WAT? return data - logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", bind_url) - res = await self.bind_threepid( - client_secret, sid, mxid, id_server, id_access_token, use_v2=False - ) - return res - async def try_unbind_threepid(self, mxid: str, threepid: dict) -> bool: """Attempt to remove a 3PID from an identity server, or if one is not provided, all identity servers we're aware the binding is present on @@ -300,8 +281,8 @@ class IdentityHandler: "id_server must be a valid hostname with optional port and path components", ) - url = "https://%s/_matrix/identity/api/v1/3pid/unbind" % (id_server,) - url_bytes = b"/_matrix/identity/api/v1/3pid/unbind" + url = "https://%s/_matrix/identity/v2/3pid/unbind" % (id_server,) + url_bytes = b"/_matrix/identity/v2/3pid/unbind" content = { "mxid": mxid, @@ -434,48 +415,6 @@ class IdentityHandler: return session_id - async def requestEmailToken( - self, - id_server: str, - email: str, - client_secret: str, - send_attempt: int, - next_link: Optional[str] = None, - ) -> JsonDict: - """ - Request an external server send an email on our behalf for the purposes of threepid - validation. - - Args: - id_server: The identity server to proxy to - email: The email to send the message to - client_secret: The unique client_secret sends by the user - send_attempt: Which attempt this is - next_link: A link to redirect the user to once they submit the token - - Returns: - The json response body from the server - """ - params = { - "email": email, - "client_secret": client_secret, - "send_attempt": send_attempt, - } - if next_link: - params["next_link"] = next_link - - try: - data = await self.http_client.post_json_get_json( - id_server + "/_matrix/identity/api/v1/validate/email/requestToken", - params, - ) - return data - except HttpResponseException as e: - logger.info("Proxied requestToken failed: %r", e) - raise e.to_synapse_error() - except RequestTimedOutError: - raise SynapseError(500, "Timed out contacting identity server") - async def requestMsisdnToken( self, id_server: str, @@ -549,18 +488,7 @@ class IdentityHandler: validation_session = None # Try to validate as email - if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - # Remote emails will only be used if a valid identity server is provided. - assert ( - self.hs.config.registration.account_threepid_delegate_email is not None - ) - - # Ask our delegated email identity server - validation_session = await self.threepid_from_creds( - self.hs.config.registration.account_threepid_delegate_email, - threepid_creds, - ) - elif self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.hs.config.email.can_verify_email: # Get a validated session matching these details validation_session = await self.store.get_threepid_validation_session( "email", client_secret, sid=sid, validated=True diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 189f52fe5a..bd7baef051 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -37,6 +37,7 @@ from synapse.api.errors import ( AuthError, Codes, ConsentNotGivenError, + LimitExceededError, NotFoundError, ShadowBanError, SynapseError, @@ -53,6 +54,7 @@ from synapse.handlers.directory import DirectoryHandler from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.send_event import ReplicationSendEventRestServlet +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter from synapse.types import ( @@ -461,6 +463,7 @@ class EventCreationHandler: ) self._events_shard_config = self.config.worker.events_shard_config self._instance_name = hs.get_instance_name() + self._notifier = hs.get_notifier() self.room_prejoin_state_types = self.hs.config.api.room_prejoin_state @@ -903,6 +906,9 @@ class EventCreationHandler: await self.clock.sleep(random.randint(1, 10)) raise ShadowBanError() + if ratelimit: + await self.request_ratelimiter.ratelimit(requester, update=False) + # We limit the number of concurrent event sends in a room so that we # don't fork the DAG too much. If we don't limit then we can end up in # a situation where event persistence can't keep up, causing @@ -1247,6 +1253,8 @@ class EventCreationHandler: Raises: ShadowBanError if the requester has been shadow-banned. + SynapseError(503) if attempting to persist a partial state event in + a room that has been un-partial stated. """ extra_users = extra_users or [] @@ -1297,24 +1305,35 @@ class EventCreationHandler: # We now persist the event (and update the cache in parallel, since we # don't want to block on it). - result, _ = await make_deferred_yieldable( - gather_results( - ( - run_in_background( - self._persist_event, - requester=requester, - event=event, - context=context, - ratelimit=ratelimit, - extra_users=extra_users, + try: + result, _ = await make_deferred_yieldable( + gather_results( + ( + run_in_background( + self._persist_event, + requester=requester, + event=event, + context=context, + ratelimit=ratelimit, + extra_users=extra_users, + ), + run_in_background( + self.cache_joined_hosts_for_event, event, context + ).addErrback( + log_failure, "cache_joined_hosts_for_event failed" + ), ), - run_in_background( - self.cache_joined_hosts_for_event, event, context - ).addErrback(log_failure, "cache_joined_hosts_for_event failed"), - ), - consumeErrors=True, + consumeErrors=True, + ) + ).addErrback(unwrapFirstError) + except PartialStateConflictError as e: + # The event context needs to be recomputed. + # Turn the error into a 429, as a hint to the client to try again. + logger.info( + "Room %s was un-partial stated while persisting client event.", + event.room_id, ) - ).addErrback(unwrapFirstError) + raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) return result @@ -1329,6 +1348,9 @@ class EventCreationHandler: """Actually persists the event. Should only be called by `handle_new_client_event`, and see its docstring for documentation of the arguments. + + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # Skip push notification actions for historical messages @@ -1345,16 +1367,21 @@ class EventCreationHandler: # If we're a worker we need to hit out to the master. writer_instance = self._events_shard_config.get_instance(event.room_id) if writer_instance != self._instance_name: - result = await self.send_event( - instance_name=writer_instance, - event_id=event.event_id, - store=self.store, - requester=requester, - event=event, - context=context, - ratelimit=ratelimit, - extra_users=extra_users, - ) + try: + result = await self.send_event( + instance_name=writer_instance, + event_id=event.event_id, + store=self.store, + requester=requester, + event=event, + context=context, + ratelimit=ratelimit, + extra_users=extra_users, + ) + except SynapseError as e: + if e.code == HTTPStatus.CONFLICT: + raise PartialStateConflictError() + raise stream_id = result["stream_id"] event_id = result["event_id"] if event_id != event.event_id: @@ -1418,7 +1445,12 @@ class EventCreationHandler: if state_entry.state_group in self._external_cache_joined_hosts_updates: return - joined_hosts = await self.store.get_joined_hosts(event.room_id, state_entry) + state = await state_entry.get_state( + self._storage_controllers.state, StateFilter.all() + ) + joined_hosts = await self.store.get_joined_hosts( + event.room_id, state, state_entry + ) # Note that the expiry times must be larger than the expiry time in # _external_cache_joined_hosts_updates. @@ -1482,6 +1514,10 @@ class EventCreationHandler: The persisted event. This may be different than the given event if it was de-duplicated (e.g. because we had already persisted an event with the same transaction ID.) + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ extra_users = extra_users or [] @@ -1515,6 +1551,16 @@ class EventCreationHandler: requester, is_admin_redaction=is_admin_redaction ) + if event.type == EventTypes.Member and event.membership == Membership.JOIN: + ( + current_membership, + _, + ) = await self.store.get_local_current_membership_for_user_in_room( + event.state_key, event.room_id + ) + if current_membership != Membership.JOIN: + self._notifier.notify_user_joined_room(event.event_id, event.room_id) + await self._maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: @@ -1814,13 +1860,8 @@ class EventCreationHandler: # For each room we need to find a joined member we can use to send # the dummy event with. - latest_event_ids = await self.store.get_prev_events_for_room(room_id) - members = await self.state.get_current_users_in_room( - room_id, latest_event_ids=latest_event_ids - ) + members = await self.store.get_local_users_in_room(room_id) for user_id in members: - if not self.hs.is_mine_id(user_id): - continue requester = create_requester(user_id, authenticated_entity=self.server_name) try: event, context = await self.create_event( @@ -1831,7 +1872,6 @@ class EventCreationHandler: "room_id": room_id, "sender": user_id, }, - prev_event_ids=latest_event_ids, ) event.internal_metadata.proactively_send = False diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 895ea63ed3..741504ba9f 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -34,7 +34,6 @@ from typing import ( Callable, Collection, Dict, - FrozenSet, Generator, Iterable, List, @@ -42,7 +41,6 @@ from typing import ( Set, Tuple, Type, - Union, ) from prometheus_client import Counter @@ -68,7 +66,6 @@ from synapse.storage.databases.main import DataStore from synapse.streams import EventSource from synapse.types import JsonDict, StreamKeyType, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer -from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.metrics import Measure from synapse.util.wheel_timer import WheelTimer @@ -1656,15 +1653,18 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): # doesn't return. C.f. #5503. return [], max_token - # Figure out which other users this user should receive updates for - users_interested_in = await self._get_interested_in(user, explicit_room_id) + # Figure out which other users this user should explicitly receive + # updates for + additional_users_interested_in = ( + await self.get_presence_router().get_interested_users(user.to_string()) + ) # We have a set of users that we're interested in the presence of. We want to # cross-reference that with the users that have actually changed their presence. # Check whether this user should see all user updates - if users_interested_in == PresenceRouter.ALL_USERS: + if additional_users_interested_in == PresenceRouter.ALL_USERS: # Provide presence state for all users presence_updates = await self._filter_all_presence_updates_for_user( user_id, include_offline, from_key @@ -1673,34 +1673,47 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): return presence_updates, max_token # Make mypy happy. users_interested_in should now be a set - assert not isinstance(users_interested_in, str) + assert not isinstance(additional_users_interested_in, str) + + # We always care about our own presence. + additional_users_interested_in.add(user_id) + + if explicit_room_id: + user_ids = await self.store.get_users_in_room(explicit_room_id) + additional_users_interested_in.update(user_ids) # The set of users that we're interested in and that have had a presence update. # We'll actually pull the presence updates for these users at the end. - interested_and_updated_users: Union[Set[str], FrozenSet[str]] = set() + interested_and_updated_users: Collection[str] if from_key is not None: # First get all users that have had a presence update updated_users = stream_change_cache.get_all_entities_changed(from_key) # Cross-reference users we're interested in with those that have had updates. - # Use a slightly-optimised method for processing smaller sets of updates. - if updated_users is not None and len(updated_users) < 500: - # For small deltas, it's quicker to get all changes and then - # cross-reference with the users we're interested in + if updated_users is not None: + # If we have the full list of changes for presence we can + # simply check which ones share a room with the user. get_updates_counter.labels("stream").inc() - for other_user_id in updated_users: - if other_user_id in users_interested_in: - # mypy thinks this variable could be a FrozenSet as it's possibly set - # to one in the `get_entities_changed` call below, and `add()` is not - # method on a FrozenSet. That doesn't affect us here though, as - # `interested_and_updated_users` is clearly a set() above. - interested_and_updated_users.add(other_user_id) # type: ignore + + sharing_users = await self.store.do_users_share_a_room( + user_id, updated_users + ) + + interested_and_updated_users = ( + sharing_users.union(additional_users_interested_in) + ).intersection(updated_users) + else: # Too many possible updates. Find all users we can see and check # if any of them have changed. get_updates_counter.labels("full").inc() + users_interested_in = ( + await self.store.get_users_who_share_room_with_user(user_id) + ) + users_interested_in.update(additional_users_interested_in) + interested_and_updated_users = ( stream_change_cache.get_entities_changed( users_interested_in, from_key @@ -1709,7 +1722,10 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): else: # No from_key has been specified. Return the presence for all users # this user is interested in - interested_and_updated_users = users_interested_in + interested_and_updated_users = ( + await self.store.get_users_who_share_room_with_user(user_id) + ) + interested_and_updated_users.update(additional_users_interested_in) # Retrieve the current presence state for each user users_to_state = await self.get_presence_handler().current_state_for_users( @@ -1804,62 +1820,6 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): def get_current_key(self) -> int: return self.store.get_current_presence_token() - @cached(num_args=2, cache_context=True) - async def _get_interested_in( - self, - user: UserID, - explicit_room_id: Optional[str] = None, - cache_context: Optional[_CacheContext] = None, - ) -> Union[Set[str], str]: - """Returns the set of users that the given user should see presence - updates for. - - Args: - user: The user to retrieve presence updates for. - explicit_room_id: The users that are in the room will be returned. - - Returns: - A set of user IDs to return presence updates for, or "ALL" to return all - known updates. - """ - user_id = user.to_string() - users_interested_in = set() - users_interested_in.add(user_id) # So that we receive our own presence - - # cache_context isn't likely to ever be None due to the @cached decorator, - # but we can't have a non-optional argument after the optional argument - # explicit_room_id either. Assert cache_context is not None so we can use it - # without mypy complaining. - assert cache_context - - # Check with the presence router whether we should poll additional users for - # their presence information - additional_users = await self.get_presence_router().get_interested_users( - user.to_string() - ) - if additional_users == PresenceRouter.ALL_USERS: - # If the module requested that this user see the presence updates of *all* - # users, then simply return that instead of calculating what rooms this - # user shares - return PresenceRouter.ALL_USERS - - # Add the additional users from the router - users_interested_in.update(additional_users) - - # Find the users who share a room with this user - users_who_share_room = await self.store.get_users_who_share_room_with_user( - user_id, on_invalidate=cache_context.invalidate - ) - users_interested_in.update(users_who_share_room) - - if explicit_room_id: - user_ids = await self.store.get_users_in_room( - explicit_room_id, on_invalidate=cache_context.invalidate - ) - users_interested_in.update(user_ids) - - return users_interested_in - def handle_timeouts( user_states: List[UserPresenceState], diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 6eed3826a7..d8ff5289b5 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -67,19 +67,14 @@ class ProfileHandler: target_user = UserID.from_string(user_id) if self.hs.is_mine(target_user): - try: - displayname = await self.store.get_profile_displayname( - target_user.localpart - ) - avatar_url = await self.store.get_profile_avatar_url( - target_user.localpart - ) - except StoreError as e: - if e.code == 404: - raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND) - raise + profileinfo = await self.store.get_profileinfo(target_user.localpart) + if profileinfo.display_name is None: + raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND) - return {"displayname": displayname, "avatar_url": avatar_url} + return { + "displayname": profileinfo.display_name, + "avatar_url": profileinfo.avatar_url, + } else: try: result = await self.federation.make_query( diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 75c0be8c36..55395457c3 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -440,7 +440,12 @@ class RoomCreationHandler: spam_check = await self.spam_checker.user_may_create_room(user_id) if spam_check != NOT_SPAM: - raise SynapseError(403, "You are not permitted to create rooms", spam_check) + raise SynapseError( + 403, + "You are not permitted to create rooms", + errcode=spam_check[0], + additional_fields=spam_check[1], + ) creation_content: JsonDict = { "room_version": new_room_version.identifier, @@ -731,7 +736,10 @@ class RoomCreationHandler: spam_check = await self.spam_checker.user_may_create_room(user_id) if spam_check != NOT_SPAM: raise SynapseError( - 403, "You are not permitted to create rooms", spam_check + 403, + "You are not permitted to create rooms", + errcode=spam_check[0], + additional_fields=spam_check[1], ) if ratelimit: @@ -881,7 +889,11 @@ class RoomCreationHandler: # override any attempt to set room versions via the creation_content creation_content["room_version"] = room_version.identifier - last_stream_id = await self._send_events_for_new_room( + ( + last_stream_id, + last_sent_event_id, + depth, + ) = await self._send_events_for_new_room( requester, room_id, preset_config=preset_config, @@ -897,7 +909,7 @@ class RoomCreationHandler: if "name" in config: name = config["name"] ( - _, + name_event, last_stream_id, ) = await self.event_creation_handler.create_and_send_nonmember_event( requester, @@ -909,12 +921,16 @@ class RoomCreationHandler: "content": {"name": name}, }, ratelimit=False, + prev_event_ids=[last_sent_event_id], + depth=depth, ) + last_sent_event_id = name_event.event_id + depth += 1 if "topic" in config: topic = config["topic"] ( - _, + topic_event, last_stream_id, ) = await self.event_creation_handler.create_and_send_nonmember_event( requester, @@ -926,7 +942,11 @@ class RoomCreationHandler: "content": {"topic": topic}, }, ratelimit=False, + prev_event_ids=[last_sent_event_id], + depth=depth, ) + last_sent_event_id = topic_event.event_id + depth += 1 # we avoid dropping the lock between invites, as otherwise joins can # start coming in and making the createRoom slow. @@ -941,7 +961,7 @@ class RoomCreationHandler: for invitee in invite_list: ( - _, + member_event_id, last_stream_id, ) = await self.room_member_handler.update_membership_locked( requester, @@ -951,7 +971,11 @@ class RoomCreationHandler: ratelimit=False, content=content, new_room=True, + prev_event_ids=[last_sent_event_id], + depth=depth, ) + last_sent_event_id = member_event_id + depth += 1 for invite_3pid in invite_3pid_list: id_server = invite_3pid["id_server"] @@ -960,7 +984,10 @@ class RoomCreationHandler: medium = invite_3pid["medium"] # Note that do_3pid_invite can raise a ShadowBanError, but this was # handled above by emptying invite_3pid_list. - last_stream_id = await self.hs.get_room_member_handler().do_3pid_invite( + ( + member_event_id, + last_stream_id, + ) = await self.hs.get_room_member_handler().do_3pid_invite( room_id, requester.user, medium, @@ -969,7 +996,11 @@ class RoomCreationHandler: requester, txn_id=None, id_access_token=id_access_token, + prev_event_ids=[last_sent_event_id], + depth=depth, ) + last_sent_event_id = member_event_id + depth += 1 result = {"room_id": room_id} @@ -997,20 +1028,24 @@ class RoomCreationHandler: power_level_content_override: Optional[JsonDict] = None, creator_join_profile: Optional[JsonDict] = None, ratelimit: bool = True, - ) -> int: + ) -> Tuple[int, str, int]: """Sends the initial events into a new room. `power_level_content_override` doesn't apply when initial state has power level state event content. Returns: - The stream_id of the last event persisted. + A tuple containing the stream ID, event ID and depth of the last + event sent to the room. """ creator_id = creator.user.to_string() event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""} + depth = 1 + last_sent_event_id: Optional[str] = None + def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: e = {"type": etype, "content": content} @@ -1020,19 +1055,30 @@ class RoomCreationHandler: return e async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: + nonlocal last_sent_event_id + nonlocal depth + event = create(etype, content, **kwargs) logger.debug("Sending %s in new room", etype) # Allow these events to be sent even if the user is shadow-banned to # allow the room creation to complete. ( - _, + sent_event, last_stream_id, ) = await self.event_creation_handler.create_and_send_nonmember_event( creator, event, ratelimit=False, ignore_shadow_ban=True, + # Note: we don't pass state_event_ids here because this triggers + # an additional query per event to look them up from the events table. + prev_event_ids=[last_sent_event_id] if last_sent_event_id else [], + depth=depth, ) + + last_sent_event_id = sent_event.event_id + depth += 1 + return last_stream_id try: @@ -1046,7 +1092,9 @@ class RoomCreationHandler: await send(etype=EventTypes.Create, content=creation_content) logger.debug("Sending %s in new room", EventTypes.Member) - await self.room_member_handler.update_membership( + # Room create event must exist at this point + assert last_sent_event_id is not None + member_event_id, _ = await self.room_member_handler.update_membership( creator, creator.user, room_id, @@ -1054,7 +1102,10 @@ class RoomCreationHandler: ratelimit=ratelimit, content=creator_join_profile, new_room=True, + prev_event_ids=[last_sent_event_id], + depth=depth, ) + last_sent_event_id = member_event_id # We treat the power levels override specially as this needs to be one # of the first events that get sent into a room. @@ -1146,7 +1197,7 @@ class RoomCreationHandler: content={"algorithm": RoomEncryptionAlgorithms.DEFAULT}, ) - return last_sent_stream_id + return last_sent_stream_id, last_sent_event_id, depth def _generate_room_id(self) -> str: """Generates a random room ID. @@ -1333,6 +1384,7 @@ class TimestampLookupHandler: self.store = hs.get_datastores().main self.state_handler = hs.get_state_handler() self.federation_client = hs.get_federation_client() + self.federation_event_handler = hs.get_federation_event_handler() self._storage_controllers = hs.get_storage_controllers() async def get_event_for_timestamp( @@ -1375,6 +1427,7 @@ class TimestampLookupHandler: # the timestamp given and the event we were able to find locally is_event_next_to_backward_gap = False is_event_next_to_forward_gap = False + local_event = None if local_event_id: local_event = await self.store.get_event( local_event_id, allow_none=False, allow_rejected=False @@ -1427,41 +1480,74 @@ class TimestampLookupHandler: remote_response, ) - # TODO: Do we want to persist this as an extremity? - # TODO: I think ideally, we would try to backfill from - # this event and run this whole - # `get_event_for_timestamp` function again to make sure - # they didn't give us an event from their gappy history. remote_event_id = remote_response.event_id - origin_server_ts = remote_response.origin_server_ts + remote_origin_server_ts = remote_response.origin_server_ts + + # Backfill this event so we can get a pagination token for + # it with `/context` and paginate `/messages` from this + # point. + # + # TODO: The requested timestamp may lie in a part of the + # event graph that the remote server *also* didn't have, + # in which case they will have returned another event + # which may be nowhere near the requested timestamp. In + # the future, we may need to reconcile that gap and ask + # other homeservers, and/or extend `/timestamp_to_event` + # to return events on *both* sides of the timestamp to + # help reconcile the gap faster. + remote_event = ( + await self.federation_event_handler.backfill_event_id( + domain, room_id, remote_event_id + ) + ) + + # XXX: When we see that the remote server is not trustworthy, + # maybe we should not ask them first in the future. + if remote_origin_server_ts != remote_event.origin_server_ts: + logger.info( + "get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.", + domain, + remote_event_id, + remote_origin_server_ts, + remote_event.origin_server_ts, + ) # Only return the remote event if it's closer than the local event if not local_event or ( - abs(origin_server_ts - timestamp) + abs(remote_event.origin_server_ts - timestamp) < abs(local_event.origin_server_ts - timestamp) ): - return remote_event_id, origin_server_ts + logger.info( + "get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)", + remote_event_id, + remote_event.origin_server_ts, + timestamp, + local_event.event_id if local_event else None, + local_event.origin_server_ts if local_event else None, + ) + return remote_event_id, remote_origin_server_ts except (HttpResponseException, InvalidResponseError) as ex: # Let's not put a high priority on some other homeserver # failing to respond or giving a random response logger.debug( - "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", + "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", domain, type(ex).__name__, ex, ex.args, ) - except Exception as ex: + except Exception: # But we do want to see some exceptions in our code logger.warning( - "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", + "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception", domain, - type(ex).__name__, - ex, - ex.args, + exc_info=True, ) - if not local_event_id: + # To appease mypy, we have to add both of these conditions to check for + # `None`. We only expect `local_event` to be `None` when + # `local_event_id` is `None` but mypy isn't as smart and assuming as us. + if not local_event_id or not local_event: raise SynapseError( 404, "Unable to find event from %s in direction %s" % (timestamp, direction), diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 183d4ae3c4..29868eb743 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -25,6 +25,7 @@ from synapse.api.constants import ( GuestAccess, HistoryVisibility, JoinRules, + PublicRoomsFilterFields, ) from synapse.api.errors import ( Codes, @@ -181,6 +182,7 @@ class RoomListHandler: == HistoryVisibility.WORLD_READABLE, "guest_can_join": room["guest_access"] == "can_join", "join_rule": room["join_rules"], + "org.matrix.msc3827.room_type": room["room_type"], } # Filter out Nones – rather omit the field altogether @@ -239,7 +241,9 @@ class RoomListHandler: response["chunk"] = results response["total_room_count_estimate"] = await self.store.count_public_rooms( - network_tuple, ignore_non_federatable=from_federation + network_tuple, + ignore_non_federatable=from_federation, + search_filter=search_filter, ) return response @@ -508,8 +512,21 @@ class RoomListNextBatch: def _matches_room_entry(room_entry: JsonDict, search_filter: dict) -> bool: - if search_filter and search_filter.get("generic_search_term", None): - generic_search_term = search_filter["generic_search_term"].upper() + """Determines whether the given search filter matches a room entry returned over + federation. + + Only used if the remote server does not support MSC2197 remote-filtered search, and + hence does not support MSC3827 filtering of `/publicRooms` by room type either. + + In this case, we cannot apply the `room_type` filter since no `room_type` field is + returned. + """ + if search_filter and search_filter.get( + PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None + ): + generic_search_term = search_filter[ + PublicRoomsFilterFields.GENERIC_SEARCH_TERM + ].upper() if generic_search_term in room_entry.get("name", "").upper(): return True elif generic_search_term in room_entry.get("topic", "").upper(): diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index e89b7441ad..30b4cb23df 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -26,13 +26,7 @@ from synapse.api.constants import ( GuestAccess, Membership, ) -from synapse.api.errors import ( - AuthError, - Codes, - LimitExceededError, - ShadowBanError, - SynapseError, -) +from synapse.api.errors import AuthError, Codes, ShadowBanError, SynapseError from synapse.api.ratelimiting import Ratelimiter from synapse.event_auth import get_named_level, get_power_level_event from synapse.events import EventBase @@ -100,26 +94,57 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): rate_hz=hs.config.ratelimiting.rc_joins_local.per_second, burst_count=hs.config.ratelimiting.rc_joins_local.burst_count, ) + # Tracks joins from local users to rooms this server isn't a member of. + # I.e. joins this server makes by requesting /make_join /send_join from + # another server. self._join_rate_limiter_remote = Ratelimiter( store=self.store, clock=self.clock, rate_hz=hs.config.ratelimiting.rc_joins_remote.per_second, burst_count=hs.config.ratelimiting.rc_joins_remote.burst_count, ) + # TODO: find a better place to keep this Ratelimiter. + # It needs to be + # - written to by event persistence code + # - written to by something which can snoop on replication streams + # - read by the RoomMemberHandler to rate limit joins from local users + # - read by the FederationServer to rate limit make_joins and send_joins from + # other homeservers + # I wonder if a homeserver-wide collection of rate limiters might be cleaner? + self._join_rate_per_room_limiter = Ratelimiter( + store=self.store, + clock=self.clock, + rate_hz=hs.config.ratelimiting.rc_joins_per_room.per_second, + burst_count=hs.config.ratelimiting.rc_joins_per_room.burst_count, + ) + # Ratelimiter for invites, keyed by room (across all issuers, all + # recipients). self._invites_per_room_limiter = Ratelimiter( store=self.store, clock=self.clock, rate_hz=hs.config.ratelimiting.rc_invites_per_room.per_second, burst_count=hs.config.ratelimiting.rc_invites_per_room.burst_count, ) - self._invites_per_user_limiter = Ratelimiter( + + # Ratelimiter for invites, keyed by recipient (across all rooms, all + # issuers). + self._invites_per_recipient_limiter = Ratelimiter( store=self.store, clock=self.clock, rate_hz=hs.config.ratelimiting.rc_invites_per_user.per_second, burst_count=hs.config.ratelimiting.rc_invites_per_user.burst_count, ) + # Ratelimiter for invites, keyed by issuer (across all rooms, all + # recipients). + self._invites_per_issuer_limiter = Ratelimiter( + store=self.store, + clock=self.clock, + rate_hz=hs.config.ratelimiting.rc_invites_per_issuer.per_second, + burst_count=hs.config.ratelimiting.rc_invites_per_issuer.burst_count, + ) + self._third_party_invite_limiter = Ratelimiter( store=self.store, clock=self.clock, @@ -128,6 +153,18 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ) self.request_ratelimiter = hs.get_request_ratelimiter() + hs.get_notifier().add_new_join_in_room_callback(self._on_user_joined_room) + + def _on_user_joined_room(self, event_id: str, room_id: str) -> None: + """Notify the rate limiter that a room join has occurred. + + Use this to inform the RoomMemberHandler about joins that have either + - taken place on another homeserver, or + - on another worker in this homeserver. + Joins actioned by this worker should use the usual `ratelimit` method, which + checks the limit and increments the counter in one go. + """ + self._join_rate_per_room_limiter.record_action(requester=None, key=room_id) @abc.abstractmethod async def _remote_join( @@ -264,7 +301,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if room_id: await self._invites_per_room_limiter.ratelimit(requester, room_id) - await self._invites_per_user_limiter.ratelimit(requester, invitee_user_id) + await self._invites_per_recipient_limiter.ratelimit(requester, invitee_user_id) + if requester is not None: + await self._invites_per_issuer_limiter.ratelimit(requester) async def _local_membership_update( self, @@ -275,6 +314,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, state_event_ids: Optional[List[str]] = None, + depth: Optional[int] = None, txn_id: Optional[str] = None, ratelimit: bool = True, content: Optional[dict] = None, @@ -305,6 +345,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): prev_events are set so we need to set them ourself via this argument. This should normally be left as None, which will cause the auth_event_ids to be calculated based on the room state at the prev_events. + depth: Override the depth used to order the event in the DAG. + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. txn_id: ratelimit: @@ -360,6 +403,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, state_event_ids=state_event_ids, + depth=depth, require_consent=require_consent, outlier=outlier, historical=historical, @@ -380,16 +424,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # Only rate-limit if the user actually joined the room, otherwise we'll end # up blocking profile updates. if newly_joined and ratelimit: - time_now_s = self.clock.time() - ( - allowed, - time_allowed, - ) = await self._join_rate_limiter_local.can_do_action(requester) - - if not allowed: - raise LimitExceededError( - retry_after_ms=int(1000 * (time_allowed - time_now_s)) - ) + await self._join_rate_limiter_local.ratelimit(requester) + await self._join_rate_per_room_limiter.ratelimit( + requester, key=room_id, update=False + ) result_event = await self.event_creation_handler.handle_new_client_event( requester, @@ -465,6 +503,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, state_event_ids: Optional[List[str]] = None, + depth: Optional[int] = None, ) -> Tuple[str, int]: """Update a user's membership in a room. @@ -500,6 +539,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): prev_events are set so we need to set them ourself via this argument. This should normally be left as None, which will cause the auth_event_ids to be calculated based on the room state at the prev_events. + depth: Override the depth used to order the event in the DAG. + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. Returns: A tuple of the new event ID and stream ID. @@ -539,6 +581,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, state_event_ids=state_event_ids, + depth=depth, ) return result @@ -561,6 +604,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, state_event_ids: Optional[List[str]] = None, + depth: Optional[int] = None, ) -> Tuple[str, int]: """Helper for update_membership. @@ -598,6 +642,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): prev_events are set so we need to set them ourself via this argument. This should normally be left as None, which will cause the auth_event_ids to be calculated based on the room state at the prev_events. + depth: Override the depth used to order the event in the DAG. + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. Returns: A tuple of the new event ID and stream ID. @@ -684,7 +731,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if target_id == self._server_notices_mxid: raise SynapseError(HTTPStatus.FORBIDDEN, "Cannot invite this user") - block_invite_code = None + block_invite_result = None if ( self._server_notices_mxid is not None @@ -702,18 +749,21 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): "Blocking invite: user is not admin and non-admin " "invites disabled" ) - block_invite_code = Codes.FORBIDDEN + block_invite_result = (Codes.FORBIDDEN, {}) spam_check = await self.spam_checker.user_may_invite( requester.user.to_string(), target_id, room_id ) if spam_check != NOT_SPAM: logger.info("Blocking invite due to spam checker") - block_invite_code = spam_check + block_invite_result = spam_check - if block_invite_code is not None: + if block_invite_result is not None: raise SynapseError( - 403, "Invites have been disabled on this server", block_invite_code + 403, + "Invites have been disabled on this server", + errcode=block_invite_result[0], + additional_fields=block_invite_result[1], ) # An empty prev_events list is allowed as long as the auth_event_ids are present @@ -728,6 +778,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, state_event_ids=state_event_ids, + depth=depth, content=content, require_consent=require_consent, outlier=outlier, @@ -736,14 +787,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): latest_event_ids = await self.store.get_prev_events_for_room(room_id) - current_state_ids = await self.state_handler.get_current_state_ids( - room_id, latest_event_ids=latest_event_ids + state_before_join = await self.state_handler.compute_state_after_events( + room_id, latest_event_ids ) # TODO: Refactor into dictionary of explicitly allowed transitions # between old and new state, with specific error messages for some # transitions and generic otherwise - old_state_id = current_state_ids.get((EventTypes.Member, target.to_string())) + old_state_id = state_before_join.get((EventTypes.Member, target.to_string())) if old_state_id: old_state = await self.store.get_event(old_state_id, allow_none=True) old_membership = old_state.content.get("membership") if old_state else None @@ -794,11 +845,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if action == "kick": raise AuthError(403, "The target user is not in the room") - is_host_in_room = await self._is_host_in_room(current_state_ids) + is_host_in_room = await self._is_host_in_room(state_before_join) if effective_membership_state == Membership.JOIN: if requester.is_guest: - guest_can_join = await self._can_guest_join(current_state_ids) + guest_can_join = await self._can_guest_join(state_before_join) if not guest_can_join: # This should be an auth check, but guests are a local concept, # so don't really fit into the general auth process. @@ -827,26 +878,32 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): target.to_string(), room_id, is_invited=inviter is not None ) if spam_check != NOT_SPAM: - raise SynapseError(403, "Not allowed to join this room", spam_check) + raise SynapseError( + 403, + "Not allowed to join this room", + errcode=spam_check[0], + additional_fields=spam_check[1], + ) # Check if a remote join should be performed. remote_join, remote_room_hosts = await self._should_perform_remote_join( - target.to_string(), room_id, remote_room_hosts, content, is_host_in_room + target.to_string(), + room_id, + remote_room_hosts, + content, + is_host_in_room, + state_before_join, ) if remote_join: if ratelimit: - time_now_s = self.clock.time() - ( - allowed, - time_allowed, - ) = await self._join_rate_limiter_remote.can_do_action( + await self._join_rate_limiter_remote.ratelimit( requester, ) - - if not allowed: - raise LimitExceededError( - retry_after_ms=int(1000 * (time_allowed - time_now_s)) - ) + await self._join_rate_per_room_limiter.ratelimit( + requester, + key=room_id, + update=False, + ) inviter = await self._get_inviter(target.to_string(), room_id) if inviter and not self.hs.is_mine(inviter): @@ -854,10 +911,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): content["membership"] = Membership.JOIN - profile = self.profile_handler - if not content_specified: - content["displayname"] = await profile.get_displayname(target) - content["avatar_url"] = await profile.get_avatar_url(target) + try: + profile = self.profile_handler + if not content_specified: + content["displayname"] = await profile.get_displayname(target) + content["avatar_url"] = await profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information while processing remote join for %r: %s", + target, + e, + ) if requester.is_guest: content["kind"] = "guest" @@ -934,11 +998,18 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): content["membership"] = Membership.KNOCK - profile = self.profile_handler - if "displayname" not in content: - content["displayname"] = await profile.get_displayname(target) - if "avatar_url" not in content: - content["avatar_url"] = await profile.get_avatar_url(target) + try: + profile = self.profile_handler + if "displayname" not in content: + content["displayname"] = await profile.get_displayname(target) + if "avatar_url" not in content: + content["avatar_url"] = await profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information while processing remote knock for %r: %s", + target, + e, + ) return await self.remote_knock( remote_room_hosts, room_id, target, content @@ -953,6 +1024,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ratelimit=ratelimit, prev_event_ids=latest_event_ids, state_event_ids=state_event_ids, + depth=depth, content=content, require_consent=require_consent, outlier=outlier, @@ -965,6 +1037,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): remote_room_hosts: List[str], content: JsonDict, is_host_in_room: bool, + state_before_join: StateMap[str], ) -> Tuple[bool, List[str]]: """ Check whether the server should do a remote join (as opposed to a local @@ -984,6 +1057,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): content: The content to use as the event body of the join. This may be modified. is_host_in_room: True if the host is in the room. + state_before_join: The state before the join event (i.e. the resolution of + the states after its parent events). Returns: A tuple of: @@ -1000,20 +1075,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # If the host is in the room, but not one of the authorised hosts # for restricted join rules, a remote join must be used. room_version = await self.store.get_room_version(room_id) - current_state_ids = await self._storage_controllers.state.get_current_state_ids( - room_id - ) # If restricted join rules are not being used, a local join can always # be used. if not await self.event_auth_handler.has_restricted_join_rules( - current_state_ids, room_version + state_before_join, room_version ): return False, [] # If the user is invited to the room or already joined, the join # event can always be issued locally. - prev_member_event_id = current_state_ids.get((EventTypes.Member, user_id), None) + prev_member_event_id = state_before_join.get((EventTypes.Member, user_id), None) prev_member_event = None if prev_member_event_id: prev_member_event = await self.store.get_event(prev_member_event_id) @@ -1028,10 +1100,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # # If not, generate a new list of remote hosts based on which # can issue invites. - event_map = await self.store.get_events(current_state_ids.values()) + event_map = await self.store.get_events(state_before_join.values()) current_state = { state_key: event_map[event_id] - for state_key, event_id in current_state_ids.items() + for state_key, event_id in state_before_join.items() } allowed_servers = get_servers_from_users( get_users_which_can_issue_invite(current_state) @@ -1045,7 +1117,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # Ensure the member should be allowed access via membership in a room. await self.event_auth_handler.check_restricted_join_rules( - current_state_ids, room_version, user_id, prev_member_event + state_before_join, room_version, user_id, prev_member_event ) # If this is going to be a local join, additional information must @@ -1055,7 +1127,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): EventContentFields.AUTHORISING_USER ] = await self.event_auth_handler.get_user_which_could_invite( room_id, - current_state_ids, + state_before_join, ) return False, [] @@ -1308,7 +1380,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): requester: Requester, txn_id: Optional[str], id_access_token: Optional[str] = None, - ) -> int: + prev_event_ids: Optional[List[str]] = None, + depth: Optional[int] = None, + ) -> Tuple[str, int]: """Invite a 3PID to a room. Args: @@ -1321,9 +1395,13 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): txn_id: The transaction ID this is part of, or None if this is not part of a transaction. id_access_token: The optional identity server access token. + depth: Override the depth used to order the event in the DAG. + prev_event_ids: The event IDs to use as the prev events + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. Returns: - The new stream ID. + Tuple of event ID and stream ordering position Raises: ShadowBanError if the requester has been shadow-banned. @@ -1369,7 +1447,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # We don't check the invite against the spamchecker(s) here (through # user_may_invite) because we'll do it further down the line anyway (in # update_membership_locked). - _, stream_id = await self.update_membership( + event_id, stream_id = await self.update_membership( requester, UserID.from_string(invitee), room_id, "invite", txn_id=txn_id ) else: @@ -1381,9 +1459,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id=room_id, ) if spam_check != NOT_SPAM: - raise SynapseError(403, "Cannot send threepid invite", spam_check) + raise SynapseError( + 403, + "Cannot send threepid invite", + errcode=spam_check[0], + additional_fields=spam_check[1], + ) - stream_id = await self._make_and_store_3pid_invite( + event, stream_id = await self._make_and_store_3pid_invite( requester, id_server, medium, @@ -1392,9 +1475,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): inviter, txn_id=txn_id, id_access_token=id_access_token, + prev_event_ids=prev_event_ids, + depth=depth, ) + event_id = event.event_id - return stream_id + return event_id, stream_id async def _make_and_store_3pid_invite( self, @@ -1406,7 +1492,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): user: UserID, txn_id: Optional[str], id_access_token: Optional[str] = None, - ) -> int: + prev_event_ids: Optional[List[str]] = None, + depth: Optional[int] = None, + ) -> Tuple[EventBase, int]: room_state = await self._storage_controllers.state.get_current_state( room_id, StateFilter.from_types( @@ -1499,8 +1587,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): }, ratelimit=False, txn_id=txn_id, + prev_event_ids=prev_event_ids, + depth=depth, ) - return stream_id + return event, stream_id async def _is_host_in_room(self, current_state_ids: StateMap[str]) -> bool: # Have we just created the room, and is this about to be the very diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index f45e06eb0e..5c01482acf 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -271,6 +271,9 @@ class StatsHandler: room_state["is_federatable"] = ( event_content.get(EventContentFields.FEDERATE, True) is True ) + room_type = event_content.get(EventContentFields.ROOM_TYPE) + if isinstance(room_type, str): + room_state["room_type"] = room_type elif typ == EventTypes.JoinRules: room_state["join_rules"] = event_content.get("join_rule") elif typ == EventTypes.RoomHistoryVisibility: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6ad053f678..d42a414c90 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -240,6 +240,7 @@ class SyncHandler: self.auth_blocking = hs.get_auth_blocking() self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state + self._device_handler = hs.get_device_handler() # TODO: flush cache entries on subsequent sync request. # Once we get the next /sync request (ie, one with the same access token @@ -1268,21 +1269,11 @@ class SyncHandler: ): users_that_have_changed.add(changed_user_id) else: - users_who_share_room = ( - await self.store.get_users_who_share_room_with_user(user_id) - ) - - # Always tell the user about their own devices. We check as the user - # ID is almost certainly already included (unless they're not in any - # rooms) and taking a copy of the set is relatively expensive. - if user_id not in users_who_share_room: - users_who_share_room = set(users_who_share_room) - users_who_share_room.add(user_id) - - tracked_users = users_who_share_room users_that_have_changed = ( - await self.store.get_users_whose_devices_changed( - since_token.device_list_key, tracked_users + await self._device_handler.get_device_changes_in_shared_rooms( + user_id, + sync_result_builder.joined_room_ids, + from_token=since_token, ) ) diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index 05cebb5d4d..a744d68c64 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -19,7 +19,6 @@ from twisted.web.client import PartialDownloadError from synapse.api.constants import LoginType from synapse.api.errors import Codes, LoginError, SynapseError -from synapse.config.emailconfig import ThreepidBehaviour from synapse.util import json_decoder if TYPE_CHECKING: @@ -153,7 +152,7 @@ class _BaseThreepidAuthChecker: logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) - # msisdns are currently always ThreepidBehaviour.REMOTE + # msisdns are currently always verified via the IS if medium == "msisdn": if not self.hs.config.registration.account_threepid_delegate_msisdn: raise SynapseError( @@ -164,18 +163,7 @@ class _BaseThreepidAuthChecker: threepid_creds, ) elif medium == "email": - if ( - self.hs.config.email.threepid_behaviour_email - == ThreepidBehaviour.REMOTE - ): - assert self.hs.config.registration.account_threepid_delegate_email - threepid = await identity_handler.threepid_from_creds( - self.hs.config.registration.account_threepid_delegate_email, - threepid_creds, - ) - elif ( - self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL - ): + if self.hs.config.email.can_verify_email: threepid = None row = await self.store.get_threepid_validation_session( medium, @@ -227,10 +215,7 @@ class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChec _BaseThreepidAuthChecker.__init__(self, hs) def is_enabled(self) -> bool: - return self.hs.config.email.threepid_behaviour_email in ( - ThreepidBehaviour.REMOTE, - ThreepidBehaviour.LOCAL, - ) + return self.hs.config.email.can_verify_email async def check_auth(self, authdict: dict, clientip: str) -> Any: return await self._check_threepid("email", authdict) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c63d068f74..3c35b1d2c7 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -79,6 +79,7 @@ from synapse.types import JsonDict from synapse.util import json_decoder from synapse.util.async_helpers import AwakenableSleeper, timeout_deferred from synapse.util.metrics import Measure +from synapse.util.stringutils import parse_and_validate_server_name if TYPE_CHECKING: from synapse.server import HomeServer @@ -479,6 +480,14 @@ class MatrixFederationHttpClient: RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. """ + # Validate server name and log if it is an invalid destination, this is + # partially to help track down code paths where we haven't validated before here + try: + parse_and_validate_server_name(request.destination) + except ValueError: + logger.exception(f"Invalid destination: {request.destination}.") + raise FederationDeniedError(request.destination) + if timeout: _sec_timeout = timeout / 1000 else: diff --git a/synapse/http/server.py b/synapse/http/server.py index e3dcc3f3dd..cf2d6f904b 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -928,6 +928,17 @@ def set_cors_headers(request: Request) -> None: ) +def set_corp_headers(request: Request) -> None: + """Set the CORP headers so that javascript running in a web browsers can + embed the resource returned from this request when their client requires + the `Cross-Origin-Embedder-Policy: require-corp` header. + + Args: + request: The http request to add the CORP header to. + """ + request.setHeader(b"Cross-Origin-Resource-Policy", b"cross-origin") + + def respond_with_html(request: Request, code: int, html: str) -> None: """ Wraps `respond_with_html_bytes` by first encoding HTML from a str to UTF-8 bytes. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 903ec40c86..c1aa205eed 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -84,14 +84,13 @@ the function becomes the operation name for the span. return something_usual_and_useful -Operation names can be explicitly set for a function by passing the -operation name to ``trace`` +Operation names can be explicitly set for a function by using ``trace_with_opname``: .. code-block:: python - from synapse.logging.opentracing import trace + from synapse.logging.opentracing import trace_with_opname - @trace(opname="a_better_operation_name") + @trace_with_opname("a_better_operation_name") def interesting_badly_named_function(*args, **kwargs): # Does all kinds of cool and expected things return something_usual_and_useful @@ -164,6 +163,7 @@ Gotchas with an active span? """ import contextlib +import enum import inspect import logging import re @@ -182,6 +182,8 @@ from typing import ( Type, TypeVar, Union, + cast, + overload, ) import attr @@ -268,7 +270,7 @@ try: _reporter: Reporter = attr.Factory(Reporter) - def set_process(self, *args, **kwargs): + def set_process(self, *args: Any, **kwargs: Any) -> None: return self._reporter.set_process(*args, **kwargs) def report_span(self, span: "opentracing.Span") -> None: @@ -319,11 +321,16 @@ _homeserver_whitelist: Optional[Pattern[str]] = None # Util methods -Sentinel = object() + +class _Sentinel(enum.Enum): + # defining a sentinel in this way allows mypy to correctly handle the + # type of a dictionary lookup. + sentinel = object() P = ParamSpec("P") R = TypeVar("R") +T = TypeVar("T") def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]: @@ -339,22 +346,43 @@ def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]: return _only_if_tracing_inner -def ensure_active_span(message, ret=None): +@overload +def ensure_active_span( + message: str, +) -> Callable[[Callable[P, R]], Callable[P, Optional[R]]]: + ... + + +@overload +def ensure_active_span( + message: str, ret: T +) -> Callable[[Callable[P, R]], Callable[P, Union[T, R]]]: + ... + + +def ensure_active_span( + message: str, ret: Optional[T] = None +) -> Callable[[Callable[P, R]], Callable[P, Union[Optional[T], R]]]: """Executes the operation only if opentracing is enabled and there is an active span. If there is no active span it logs message at the error level. Args: - message (str): Message which fills in "There was no active span when trying to %s" + message: Message which fills in "There was no active span when trying to %s" in the error log if there is no active span and opentracing is enabled. - ret (object): return value if opentracing is None or there is no active span. + ret: return value if opentracing is None or there is no active span. - Returns (object): The result of the func or ret if opentracing is disabled or there + Returns: + The result of the func, falling back to ret if opentracing is disabled or there was no active span. """ - def ensure_active_span_inner_1(func): + def ensure_active_span_inner_1( + func: Callable[P, R] + ) -> Callable[P, Union[Optional[T], R]]: @wraps(func) - def ensure_active_span_inner_2(*args, **kwargs): + def ensure_active_span_inner_2( + *args: P.args, **kwargs: P.kwargs + ) -> Union[Optional[T], R]: if not opentracing: return ret @@ -402,7 +430,7 @@ def init_tracer(hs: "HomeServer") -> None: config = JaegerConfig( config=hs.config.tracing.jaeger_config, service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}", - scope_manager=LogContextScopeManager(hs.config), + scope_manager=LogContextScopeManager(), metrics_factory=PrometheusMetricsFactory(), ) @@ -451,16 +479,16 @@ def whitelisted_homeserver(destination: str) -> bool: # Could use kwargs but I want these to be explicit def start_active_span( - operation_name, - child_of=None, - references=None, - tags=None, - start_time=None, - ignore_active_span=False, - finish_on_close=True, + operation_name: str, + child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None, + references: Optional[List["opentracing.Reference"]] = None, + tags: Optional[Dict[str, str]] = None, + start_time: Optional[float] = None, + ignore_active_span: bool = False, + finish_on_close: bool = True, *, - tracer=None, -): + tracer: Optional["opentracing.Tracer"] = None, +) -> "opentracing.Scope": """Starts an active opentracing span. Records the start time for the span, and sets it as the "active span" in the @@ -493,12 +521,12 @@ def start_active_span( def start_active_span_follows_from( operation_name: str, contexts: Collection, - child_of=None, + child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None, start_time: Optional[float] = None, *, - inherit_force_tracing=False, - tracer=None, -): + inherit_force_tracing: bool = False, + tracer: Optional["opentracing.Tracer"] = None, +) -> "opentracing.Scope": """Starts an active opentracing span, with additional references to previous spans Args: @@ -540,7 +568,7 @@ def start_active_span_from_edu( edu_content: Dict[str, Any], operation_name: str, references: Optional[List["opentracing.Reference"]] = None, - tags: Optional[Dict] = None, + tags: Optional[Dict[str, str]] = None, start_time: Optional[float] = None, ignore_active_span: bool = False, finish_on_close: bool = True, @@ -617,23 +645,27 @@ def set_operation_name(operation_name: str) -> None: @only_if_tracing -def force_tracing(span=Sentinel) -> None: +def force_tracing( + span: Union["opentracing.Span", _Sentinel] = _Sentinel.sentinel +) -> None: """Force sampling for the active/given span and its children. Args: span: span to force tracing for. By default, the active span. """ - if span is Sentinel: - span = opentracing.tracer.active_span - if span is None: + if isinstance(span, _Sentinel): + span_to_trace = opentracing.tracer.active_span + else: + span_to_trace = span + if span_to_trace is None: logger.error("No active span in force_tracing") return - span.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) + span_to_trace.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) # also set a bit of baggage, so that we have a way of figuring out if # it is enabled later - span.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1") + span_to_trace.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1") def is_context_forced_tracing( @@ -709,7 +741,9 @@ def inject_response_headers(response_headers: Headers) -> None: response_headers.addRawHeader("Synapse-Trace-Id", f"{trace_id:x}") -@ensure_active_span("get the active span context as a dict", ret={}) +@ensure_active_span( + "get the active span context as a dict", ret=cast(Dict[str, str], {}) +) def get_active_span_text_map(destination: Optional[str] = None) -> Dict[str, str]: """ Gets a span context as a dict. This can be used instead of manually @@ -789,44 +823,42 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte # Tracing decorators -def trace(func=None, opname=None): +def trace_with_opname(opname: str) -> Callable[[Callable[P, R]], Callable[P, R]]: """ - Decorator to trace a function. - Sets the operation name to that of the function's or that given - as operation_name. See the module's doc string for usage - examples. + Decorator to trace a function with a custom opname. + + See the module's doc string for usage examples. + """ - def decorator(func): + def decorator(func: Callable[P, R]) -> Callable[P, R]: if opentracing is None: return func # type: ignore[unreachable] - _opname = opname if opname else func.__name__ - if inspect.iscoroutinefunction(func): @wraps(func) - async def _trace_inner(*args, **kwargs): - with start_active_span(_opname): - return await func(*args, **kwargs) + async def _trace_inner(*args: P.args, **kwargs: P.kwargs) -> R: + with start_active_span(opname): + return await func(*args, **kwargs) # type: ignore[misc] else: # The other case here handles both sync functions and those # decorated with inlineDeferred. @wraps(func) - def _trace_inner(*args, **kwargs): - scope = start_active_span(_opname) + def _trace_inner(*args: P.args, **kwargs: P.kwargs) -> R: + scope = start_active_span(opname) scope.__enter__() try: result = func(*args, **kwargs) if isinstance(result, defer.Deferred): - def call_back(result): + def call_back(result: R) -> R: scope.__exit__(None, None, None) return result - def err_back(result): + def err_back(result: R) -> R: scope.__exit__(None, None, None) return result @@ -849,12 +881,21 @@ def trace(func=None, opname=None): scope.__exit__(type(e), None, e.__traceback__) raise - return _trace_inner + return _trace_inner # type: ignore[return-value] - if func: - return decorator(func) - else: - return decorator + return decorator + + +def trace(func: Callable[P, R]) -> Callable[P, R]: + """ + Decorator to trace a function. + + Sets the operation name to that of the function's name. + + See the module's doc string for usage examples. + """ + + return trace_with_opname(func.__name__)(func) def tag_args(func: Callable[P, R]) -> Callable[P, R]: @@ -869,9 +910,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: def _tag_args_inner(*args: P.args, **kwargs: P.kwargs) -> R: argspec = inspect.getfullargspec(func) for i, arg in enumerate(argspec.args[1:]): - set_tag("ARG_" + arg, args[i]) # type: ignore[index] - set_tag("args", args[len(argspec.args) :]) # type: ignore[index] - set_tag("kwargs", kwargs) + set_tag("ARG_" + arg, str(args[i])) # type: ignore[index] + set_tag("args", str(args[len(argspec.args) :])) # type: ignore[index] + set_tag("kwargs", str(kwargs)) return func(*args, **kwargs) return _tag_args_inner diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py index a26a1a58e7..10877bdfc5 100644 --- a/synapse/logging/scopecontextmanager.py +++ b/synapse/logging/scopecontextmanager.py @@ -16,11 +16,15 @@ import logging from types import TracebackType from typing import Optional, Type -from opentracing import Scope, ScopeManager +from opentracing import Scope, ScopeManager, Span import twisted -from synapse.logging.context import current_context, nested_logging_context +from synapse.logging.context import ( + LoggingContext, + current_context, + nested_logging_context, +) logger = logging.getLogger(__name__) @@ -35,11 +39,11 @@ class LogContextScopeManager(ScopeManager): but currently that doesn't work due to https://twistedmatrix.com/trac/ticket/10301. """ - def __init__(self, config): + def __init__(self) -> None: pass @property - def active(self): + def active(self) -> Optional[Scope]: """ Returns the currently active Scope which can be used to access the currently active Scope.span. @@ -48,19 +52,18 @@ class LogContextScopeManager(ScopeManager): Tracer.start_active_span() time. Return: - (Scope) : the Scope that is active, or None if not - available. + The Scope that is active, or None if not available. """ ctx = current_context() return ctx.scope - def activate(self, span, finish_on_close): + def activate(self, span: Span, finish_on_close: bool) -> Scope: """ Makes a Span active. Args - span (Span): the span that should become active. - finish_on_close (Boolean): whether Span should be automatically - finished when Scope.close() is called. + span: the span that should become active. + finish_on_close: whether Span should be automatically finished when + Scope.close() is called. Returns: Scope to control the end of the active period for @@ -112,8 +115,8 @@ class _LogContextScope(Scope): def __init__( self, manager: LogContextScopeManager, - span, - logcontext, + span: Span, + logcontext: LoggingContext, enter_logcontext: bool, finish_on_close: bool, ): @@ -121,13 +124,13 @@ class _LogContextScope(Scope): Args: manager: the manager that is responsible for this scope. - span (Span): + span: the opentracing span which this scope represents the local lifetime for. - logcontext (LogContext): - the logcontext to which this scope is attached. + logcontext: + the log context to which this scope is attached. enter_logcontext: - if True the logcontext will be exited when the scope is finished + if True the log context will be exited when the scope is finished finish_on_close: if True finish the span when the scope is closed """ diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index eef3462e10..7a1516d3a8 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -235,7 +235,7 @@ def run_as_background_process( f"bgproc.{desc}", tags={SynapseTags.REQUEST_ID: str(context)} ) else: - ctx = nullcontext() + ctx = nullcontext() # type: ignore[assignment] with ctx: return await func(*args, **kwargs) except Exception: diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 6191c2dc96..6d8bf54083 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -35,6 +35,7 @@ from typing_extensions import ParamSpec from twisted.internet import defer from twisted.web.resource import Resource +from synapse.api import errors from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.events.presence_router import ( diff --git a/synapse/notifier.py b/synapse/notifier.py index 54b0ec4b97..c42bb8266a 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -228,6 +228,7 @@ class Notifier: # Called when there are new things to stream over replication self.replication_callbacks: List[Callable[[], None]] = [] + self._new_join_in_room_callbacks: List[Callable[[str, str], None]] = [] self._federation_client = hs.get_federation_http_client() @@ -280,6 +281,19 @@ class Notifier: """ self.replication_callbacks.append(cb) + def add_new_join_in_room_callback(self, cb: Callable[[str, str], None]) -> None: + """Add a callback that will be called when a user joins a room. + + This only fires on genuine membership changes, e.g. "invite" -> "join". + Membership transitions like "join" -> "join" (for e.g. displayname changes) do + not trigger the callback. + + When called, the callback receives two arguments: the event ID and the room ID. + It should *not* return a Deferred - if it needs to do any asynchronous work, a + background thread should be started and wrapped with run_as_background_process. + """ + self._new_join_in_room_callbacks.append(cb) + async def on_new_room_event( self, event: EventBase, @@ -723,6 +737,10 @@ class Notifier: for cb in self.replication_callbacks: cb() + def notify_user_joined_room(self, event_id: str, room_id: str) -> None: + for cb in self._new_join_in_room_callbacks: + cb(event_id, room_id) + def notify_remote_server_up(self, server: str) -> None: """Notify any replication that a remote server has come back up""" # We call federation_sender directly rather than registering as a diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index 819bc9e9b6..6c0cc5a6ce 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -290,7 +290,13 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "key": "type", "pattern": "m.room.server_acl", "_cache_key": "_room_server_acl", - } + }, + { + "kind": "event_match", + "key": "state_key", + "pattern": "", + "_cache_key": "_room_server_acl_state_key", + }, ], "actions": [], }, diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 7791b289e2..713dcf6950 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -17,7 +17,6 @@ import itertools import logging from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union -import attr from prometheus_client import Counter from synapse.api.constants import EventTypes, Membership, RelationTypes @@ -26,13 +25,11 @@ from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership -from synapse.util.async_helpers import Linearizer -from synapse.util.caches import CacheMetric, register_cache -from synapse.util.caches.descriptors import lru_cache -from synapse.util.caches.lrucache import LruCache +from synapse.storage.state import StateFilter +from synapse.util.caches import register_cache from synapse.util.metrics import measure_func +from synapse.visibility import filter_event_for_clients_with_state -from ..storage.state import StateFilter from .push_rule_evaluator import PushRuleEvaluatorForEvent if TYPE_CHECKING: @@ -48,15 +45,6 @@ push_rules_state_size_counter = Counter( "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", "" ) -# Measures whether we use the fast path of using state deltas, or if we have to -# recalculate from scratch -push_rules_delta_state_cache_metric = register_cache( - "cache", - "push_rules_delta_state_cache_metric", - cache=[], # Meaningless size, as this isn't a cache that stores values - resizable=False, -) - STATE_EVENT_TYPES_TO_MARK_UNREAD = { EventTypes.Topic, @@ -111,10 +99,6 @@ class BulkPushRuleEvaluator: self.clock = hs.get_clock() self._event_auth_handler = hs.get_event_auth_handler() - # Used by `RulesForRoom` to ensure only one thing mutates the cache at a - # time. Keyed off room_id. - self._rules_linearizer = Linearizer(name="rules_for_room") - self.room_push_rule_cache_metrics = register_cache( "cache", "room_push_rule_cache", @@ -126,47 +110,54 @@ class BulkPushRuleEvaluator: self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled async def _get_rules_for_event( - self, event: EventBase, context: EventContext + self, + event: EventBase, ) -> Dict[str, List[Dict[str, Any]]]: - """This gets the rules for all users in the room at the time of the event, - as well as the push rules for the invitee if the event is an invite. + """Get the push rules for all users who may need to be notified about + the event. + + Note: this does not check if the user is allowed to see the event. Returns: - dict of user_id -> push_rules + Mapping of user ID to their push rules. """ - room_id = event.room_id - - rules_for_room_data = self._get_rules_for_room(room_id) - rules_for_room = RulesForRoom( - hs=self.hs, - room_id=room_id, - rules_for_room_cache=self._get_rules_for_room.cache, - room_push_rule_cache_metrics=self.room_push_rule_cache_metrics, - linearizer=self._rules_linearizer, - cached_data=rules_for_room_data, - ) - - rules_by_user = await rules_for_room.get_rules(event, context) + # We get the users who may need to be notified by first fetching the + # local users currently in the room, finding those that have push rules, + # and *then* checking which users are actually allowed to see the event. + # + # The alternative is to first fetch all users that were joined at the + # event, but that requires fetching the full state at the event, which + # may be expensive for large rooms with few local users. + + local_users = await self.store.get_local_users_in_room(event.room_id) + + # Filter out appservice users. + local_users = [ + u + for u in local_users + if not self.store.get_if_app_services_interested_in_user(u) + ] # if this event is an invite event, we may need to run rules for the user # who's been invited, otherwise they won't get told they've been invited - if event.type == "m.room.member" and event.content["membership"] == "invite": + if event.type == EventTypes.Member and event.membership == Membership.INVITE: invited = event.state_key - if invited and self.hs.is_mine_id(invited): - rules_by_user = dict(rules_by_user) - rules_by_user[invited] = await self.store.get_push_rules_for_user( - invited - ) + if invited and self.hs.is_mine_id(invited) and invited not in local_users: + local_users = list(local_users) + local_users.append(invited) - return rules_by_user + rules_by_user = await self.store.bulk_get_push_rules(local_users) - @lru_cache() - def _get_rules_for_room(self, room_id: str) -> "RulesForRoomData": - """Get the current RulesForRoomData object for the given room id""" - # It's important that the RulesForRoomData object gets added to self._get_rules_for_room.cache - # before any lookup methods get called on it as otherwise there may be - # a race if invalidate_all gets called (which assumes its in the cache) - return RulesForRoomData() + logger.debug("Users in room: %s", local_users) + + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Returning push rules for %r %r", + event.room_id, + list(rules_by_user.keys()), + ) + + return rules_by_user async def _get_power_levels_and_sender_level( self, event: EventBase, context: EventContext @@ -262,10 +253,12 @@ class BulkPushRuleEvaluator: count_as_unread = _should_count_as_unread(event, context) - rules_by_user = await self._get_rules_for_event(event, context) + rules_by_user = await self._get_rules_for_event(event) actions_by_user: Dict[str, List[Union[dict, str]]] = {} - room_members = await self.store.get_joined_users_from_context(event, context) + room_member_count = await self.store.get_number_joined_users_in_room( + event.room_id + ) ( power_levels, @@ -278,30 +271,36 @@ class BulkPushRuleEvaluator: evaluator = PushRuleEvaluatorForEvent( event, - len(room_members), + room_member_count, sender_power_level, power_levels, relations, self._relations_match_enabled, ) - # If the event is not a state event check if any users ignore the sender. - if not event.is_state(): - ignorers = await self.store.ignored_by(event.sender) - else: - ignorers = frozenset() + users = rules_by_user.keys() + profiles = await self.store.get_subset_users_in_room_with_profiles( + event.room_id, users + ) + + # This is a check for the case where user joins a room without being + # allowed to see history, and then the server receives a delayed event + # from before the user joined, which they should not be pushed for + uids_with_visibility = await filter_event_for_clients_with_state( + self.store, users, event, context + ) for uid, rules in rules_by_user.items(): if event.sender == uid: continue - if uid in ignorers: + if uid not in uids_with_visibility: continue display_name = None - profile_info = room_members.get(uid) - if profile_info: - display_name = profile_info.display_name + profile = profiles.get(uid) + if profile: + display_name = profile.display_name if not display_name: # Handle the case where we are pushing a membership event to @@ -346,283 +345,3 @@ MemberMap = Dict[str, Optional[EventIdMembership]] Rule = Dict[str, dict] RulesByUser = Dict[str, List[Rule]] StateGroup = Union[object, int] - - -@attr.s(slots=True, auto_attribs=True) -class RulesForRoomData: - """The data stored in the cache by `RulesForRoom`. - - We don't store `RulesForRoom` directly in the cache as we want our caches to - *only* include data, and not references to e.g. the data stores. - """ - - # event_id -> EventIdMembership - member_map: MemberMap = attr.Factory(dict) - # user_id -> rules - rules_by_user: RulesByUser = attr.Factory(dict) - - # The last state group we updated the caches for. If the state_group of - # a new event comes along, we know that we can just return the cached - # result. - # On invalidation of the rules themselves (if the user changes them), - # we invalidate everything and set state_group to `object()` - state_group: StateGroup = attr.Factory(object) - - # A sequence number to keep track of when we're allowed to update the - # cache. We bump the sequence number when we invalidate the cache. If - # the sequence number changes while we're calculating stuff we should - # not update the cache with it. - sequence: int = 0 - - # A cache of user_ids that we *know* aren't interesting, e.g. user_ids - # owned by AS's, or remote users, etc. (I.e. users we will never need to - # calculate push for) - # These never need to be invalidated as we will never set up push for - # them. - uninteresting_user_set: Set[str] = attr.Factory(set) - - -class RulesForRoom: - """Caches push rules for users in a room. - - This efficiently handles users joining/leaving the room by not invalidating - the entire cache for the room. - - A new instance is constructed for each call to - `BulkPushRuleEvaluator._get_rules_for_event`, with the cached data from - previous calls passed in. - """ - - def __init__( - self, - hs: "HomeServer", - room_id: str, - rules_for_room_cache: LruCache, - room_push_rule_cache_metrics: CacheMetric, - linearizer: Linearizer, - cached_data: RulesForRoomData, - ): - """ - Args: - hs: The HomeServer object. - room_id: The room ID. - rules_for_room_cache: The cache object that caches these - RoomsForUser objects. - room_push_rule_cache_metrics: The metrics object - linearizer: The linearizer used to ensure only one thing mutates - the cache at a time. Keyed off room_id - cached_data: Cached data from previous calls to `self.get_rules`, - can be mutated. - """ - self.room_id = room_id - self.is_mine_id = hs.is_mine_id - self.store = hs.get_datastores().main - self.room_push_rule_cache_metrics = room_push_rule_cache_metrics - - # Used to ensure only one thing mutates the cache at a time. Keyed off - # room_id. - self.linearizer = linearizer - - self.data = cached_data - - # We need to be clever on the invalidating caches callbacks, as - # otherwise the invalidation callback holds a reference to the object, - # potentially causing it to leak. - # To get around this we pass a function that on invalidations looks ups - # the RoomsForUser entry in the cache, rather than keeping a reference - # to self around in the callback. - self.invalidate_all_cb = _Invalidation(rules_for_room_cache, room_id) - - async def get_rules( - self, event: EventBase, context: EventContext - ) -> Dict[str, List[Dict[str, dict]]]: - """Given an event context return the rules for all users who are - currently in the room. - """ - state_group = context.state_group - - if state_group and self.data.state_group == state_group: - logger.debug("Using cached rules for %r", self.room_id) - self.room_push_rule_cache_metrics.inc_hits() - return self.data.rules_by_user - - async with self.linearizer.queue(self.room_id): - if state_group and self.data.state_group == state_group: - logger.debug("Using cached rules for %r", self.room_id) - self.room_push_rule_cache_metrics.inc_hits() - return self.data.rules_by_user - - self.room_push_rule_cache_metrics.inc_misses() - - ret_rules_by_user = {} - missing_member_event_ids = {} - if state_group and self.data.state_group == context.prev_group: - # If we have a simple delta then we can reuse most of the previous - # results. - ret_rules_by_user = self.data.rules_by_user - current_state_ids = context.delta_ids - - push_rules_delta_state_cache_metric.inc_hits() - else: - current_state_ids = await context.get_current_state_ids() - push_rules_delta_state_cache_metric.inc_misses() - # Ensure the state IDs exist. - assert current_state_ids is not None - - push_rules_state_size_counter.inc(len(current_state_ids)) - - logger.debug( - "Looking for member changes in %r %r", state_group, current_state_ids - ) - - # Loop through to see which member events we've seen and have rules - # for and which we need to fetch - for key in current_state_ids: - typ, user_id = key - if typ != EventTypes.Member: - continue - - if user_id in self.data.uninteresting_user_set: - continue - - if not self.is_mine_id(user_id): - self.data.uninteresting_user_set.add(user_id) - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - self.data.uninteresting_user_set.add(user_id) - continue - - event_id = current_state_ids[key] - - res = self.data.member_map.get(event_id, None) - if res: - if res.membership == Membership.JOIN: - rules = self.data.rules_by_user.get(res.user_id, None) - if rules: - ret_rules_by_user[res.user_id] = rules - continue - - # If a user has left a room we remove their push rule. If they - # joined then we re-add it later in _update_rules_with_member_event_ids - ret_rules_by_user.pop(user_id, None) - missing_member_event_ids[user_id] = event_id - - if missing_member_event_ids: - # If we have some member events we haven't seen, look them up - # and fetch push rules for them if appropriate. - logger.debug("Found new member events %r", missing_member_event_ids) - await self._update_rules_with_member_event_ids( - ret_rules_by_user, missing_member_event_ids, state_group, event - ) - else: - # The push rules didn't change but lets update the cache anyway - self.update_cache( - self.data.sequence, - members={}, # There were no membership changes - rules_by_user=ret_rules_by_user, - state_group=state_group, - ) - - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - "Returning push rules for %r %r", self.room_id, ret_rules_by_user.keys() - ) - return ret_rules_by_user - - async def _update_rules_with_member_event_ids( - self, - ret_rules_by_user: Dict[str, list], - member_event_ids: Dict[str, str], - state_group: Optional[int], - event: EventBase, - ) -> None: - """Update the partially filled rules_by_user dict by fetching rules for - any newly joined users in the `member_event_ids` list. - - Args: - ret_rules_by_user: Partially filled dict of push rules. Gets - updated with any new rules. - member_event_ids: Dict of user id to event id for membership events - that have happened since the last time we filled rules_by_user - state_group: The state group we are currently computing push rules - for. Used when updating the cache. - event: The event we are currently computing push rules for. - """ - sequence = self.data.sequence - - members = await self.store.get_membership_from_event_ids( - member_event_ids.values() - ) - - # If the event is a join event then it will be in current state events - # map but not in the DB, so we have to explicitly insert it. - if event.type == EventTypes.Member: - for event_id in member_event_ids.values(): - if event_id == event.event_id: - members[event_id] = EventIdMembership( - user_id=event.state_key, membership=event.membership - ) - - if logger.isEnabledFor(logging.DEBUG): - logger.debug("Found members %r: %r", self.room_id, members.values()) - - joined_user_ids = { - entry.user_id - for entry in members.values() - if entry and entry.membership == Membership.JOIN - } - - logger.debug("Joined: %r", joined_user_ids) - - # Previously we only considered users with pushers or read receipts in that - # room. We can't do this anymore because we use push actions to calculate unread - # counts, which don't rely on the user having pushers or sent a read receipt into - # the room. Therefore we just need to filter for local users here. - user_ids = list(filter(self.is_mine_id, joined_user_ids)) - - rules_by_user = await self.store.bulk_get_push_rules( - user_ids, on_invalidate=self.invalidate_all_cb - ) - - ret_rules_by_user.update( - item for item in rules_by_user.items() if item[0] is not None - ) - - self.update_cache(sequence, members, ret_rules_by_user, state_group) - - def update_cache( - self, - sequence: int, - members: MemberMap, - rules_by_user: RulesByUser, - state_group: StateGroup, - ) -> None: - if sequence == self.data.sequence: - self.data.member_map.update(members) - self.data.rules_by_user = rules_by_user - self.data.state_group = state_group - - -@attr.attrs(slots=True, frozen=True, auto_attribs=True) -class _Invalidation: - # _Invalidation is passed as an `on_invalidate` callback to bulk_get_push_rules, - # which means that it it is stored on the bulk_get_push_rules cache entry. In order - # to ensure that we don't accumulate lots of redundant callbacks on the cache entry, - # we need to ensure that two _Invalidation objects are "equal" if they refer to the - # same `cache` and `room_id`. - # - # attrs provides suitable __hash__ and __eq__ methods, provided we remember to - # set `frozen=True`. - - cache: LruCache - room_id: str - - def __call__(self) -> None: - rules_data = self.cache.get(self.room_id, None, update_metrics=False) - if rules_data: - rules_data.sequence += 1 - rules_data.state_group = object() - rules_data.member_map = {} - rules_data.rules_by_user = {} - push_rules_invalidation_counter.inc() diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index d0cc657b44..1e0ef44fc7 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -328,7 +328,7 @@ class PusherPool: return None try: - p = self.pusher_factory.create_pusher(pusher_config) + pusher = self.pusher_factory.create_pusher(pusher_config) except PusherConfigException as e: logger.warning( "Pusher incorrectly configured id=%i, user=%s, appid=%s, pushkey=%s: %s", @@ -346,23 +346,28 @@ class PusherPool: ) return None - if not p: + if not pusher: return None - appid_pushkey = "%s:%s" % (pusher_config.app_id, pusher_config.pushkey) + appid_pushkey = "%s:%s" % (pusher.app_id, pusher.pushkey) - byuser = self.pushers.setdefault(pusher_config.user_name, {}) + byuser = self.pushers.setdefault(pusher.user_id, {}) if appid_pushkey in byuser: - byuser[appid_pushkey].on_stop() - byuser[appid_pushkey] = p + previous_pusher = byuser[appid_pushkey] + previous_pusher.on_stop() - synapse_pushers.labels(type(p).__name__, p.app_id).inc() + synapse_pushers.labels( + type(previous_pusher).__name__, previous_pusher.app_id + ).dec() + byuser[appid_pushkey] = pusher + + synapse_pushers.labels(type(pusher).__name__, pusher.app_id).inc() # Check if there *may* be push to process. We do this as this check is a # lot cheaper to do than actually fetching the exact rows we need to # push. - user_id = pusher_config.user_name - last_stream_ordering = pusher_config.last_stream_ordering + user_id = pusher.user_id + last_stream_ordering = pusher.last_stream_ordering if last_stream_ordering: have_notifs = await self.store.get_if_maybe_push_in_range_for_user( user_id, last_stream_ordering @@ -372,9 +377,9 @@ class PusherPool: # risk missing push. have_notifs = True - p.on_started(have_notifs) + pusher.on_started(have_notifs) - return p + return pusher async def remove_pusher(self, app_id: str, pushkey: str, user_id: str) -> None: appid_pushkey = "%s:%s" % (app_id, pushkey) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index aec040ee19..53aa7fa4c6 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -25,6 +25,7 @@ from synapse.replication.http import ( push, register, send_event, + state, streams, ) @@ -48,6 +49,7 @@ class ReplicationRestResource(JsonResource): streams.register_servlets(hs, self) account_data.register_servlets(hs, self) push.register_servlets(hs, self) + state.register_servlets(hs, self) # The following can't currently be instantiated on workers. if hs.config.worker.worker_app is None: diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index a4ae4040c3..561ad5bf04 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -29,7 +29,7 @@ from synapse.http import RequestTimedOutError from synapse.http.server import HttpServer, is_method_cancellable from synapse.http.site import SynapseRequest from synapse.logging import opentracing -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import trace_with_opname from synapse.types import JsonDict from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import random_string @@ -196,7 +196,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta): "ascii" ) - @trace(opname="outgoing_replication_request") + @trace_with_opname("outgoing_replication_request") async def send_request(*, instance_name: str = "master", **kwargs: Any) -> Any: with outgoing_gauge.track_inprogress(): if instance_name == local_instance_name: diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index eed29cd597..d3abafed28 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -60,6 +60,9 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): { "max_stream_id": 32443, } + + Responds with a 409 when a `PartialStateConflictError` is raised due to an event + context that needs to be recomputed due to the un-partial stating of a room. """ NAME = "fed_send_events" diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index c2b2588ea5..486f04723c 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -59,6 +59,9 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): { "stream_id": 12345, "event_id": "$abcdef..." } + Responds with a 409 when a `PartialStateConflictError` is raised due to an event + context that needs to be recomputed due to the un-partial stating of a room. + The returned event ID may not match the sent event if it was deduplicated. """ diff --git a/synapse/replication/http/state.py b/synapse/replication/http/state.py new file mode 100644 index 0000000000..838b7584e5 --- /dev/null +++ b/synapse/replication/http/state.py @@ -0,0 +1,75 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import TYPE_CHECKING, Tuple + +from twisted.web.server import Request + +from synapse.api.errors import SynapseError +from synapse.http.server import HttpServer +from synapse.replication.http._base import ReplicationEndpoint +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class ReplicationUpdateCurrentStateRestServlet(ReplicationEndpoint): + """Recalculates the current state for a room, and persists it. + + The API looks like: + + POST /_synapse/replication/update_current_state/:room_id + + {} + + 200 OK + + {} + """ + + NAME = "update_current_state" + PATH_ARGS = ("room_id",) + + def __init__(self, hs: "HomeServer"): + super().__init__(hs) + + self._state_handler = hs.get_state_handler() + self._events_shard_config = hs.config.worker.events_shard_config + self._instance_name = hs.get_instance_name() + + @staticmethod + async def _serialize_payload(room_id: str) -> JsonDict: # type: ignore[override] + return {} + + async def _handle_request( # type: ignore[override] + self, request: Request, room_id: str + ) -> Tuple[int, JsonDict]: + writer_instance = self._events_shard_config.get_instance(room_id) + if writer_instance != self._instance_name: + raise SynapseError( + 400, "/update_current_state request was routed to the wrong worker" + ) + + await self._state_handler.update_current_state(room_id) + + return 200, {} + + +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: + if hs.get_instance_name() in hs.config.worker.writers.events: + ReplicationUpdateCurrentStateRestServlet(hs).register(http_server) diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py deleted file mode 100644 index 7644146dba..0000000000 --- a/synapse/replication/slave/storage/_base.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from typing import TYPE_CHECKING, Optional - -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection -from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore -from synapse.storage.engines import PostgresEngine -from synapse.storage.util.id_generators import MultiWriterIdGenerator - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -class BaseSlavedStore(CacheInvalidationWorkerStore): - def __init__( - self, - database: DatabasePool, - db_conn: LoggingDatabaseConnection, - hs: "HomeServer", - ): - super().__init__(database, db_conn, hs) - if isinstance(self.database_engine, PostgresEngine): - self._cache_id_gen: Optional[ - MultiWriterIdGenerator - ] = MultiWriterIdGenerator( - db_conn, - database, - stream_name="caches", - instance_name=hs.get_instance_name(), - tables=[ - ( - "cache_invalidation_stream_by_instance", - "instance_name", - "stream_id", - ) - ], - sequence_name="cache_invalidation_stream_seq", - writers=[], - ) - else: - self._cache_id_gen = None - - self.hs = hs diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py deleted file mode 100644 index ee74ee7d85..0000000000 --- a/synapse/replication/slave/storage/account_data.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.replication.slave.storage._base import BaseSlavedStore -from synapse.storage.databases.main.account_data import AccountDataWorkerStore -from synapse.storage.databases.main.tags import TagsWorkerStore - - -class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlavedStore): - pass diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py deleted file mode 100644 index 29f50c0add..0000000000 --- a/synapse/replication/slave/storage/appservice.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.storage.databases.main.appservice import ( - ApplicationServiceTransactionWorkerStore, - ApplicationServiceWorkerStore, -) - - -class SlavedApplicationServiceStore( - ApplicationServiceTransactionWorkerStore, ApplicationServiceWorkerStore -): - pass diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py deleted file mode 100644 index e940751084..0000000000 --- a/synapse/replication/slave/storage/deviceinbox.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.replication.slave.storage._base import BaseSlavedStore -from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore - - -class SlavedDeviceInboxStore(DeviceInboxWorkerStore, BaseSlavedStore): - pass diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index a48cc02069..6fcade510a 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -14,7 +14,6 @@ from typing import TYPE_CHECKING, Any, Iterable -from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream from synapse.storage.database import DatabasePool, LoggingDatabaseConnection @@ -24,7 +23,7 @@ if TYPE_CHECKING: from synapse.server import HomeServer -class SlavedDeviceStore(DeviceWorkerStore, BaseSlavedStore): +class SlavedDeviceStore(DeviceWorkerStore): def __init__( self, database: DatabasePool, diff --git a/synapse/replication/slave/storage/directory.py b/synapse/replication/slave/storage/directory.py deleted file mode 100644 index 71fde0c96c..0000000000 --- a/synapse/replication/slave/storage/directory.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.storage.databases.main.directory import DirectoryWorkerStore - -from ._base import BaseSlavedStore - - -class DirectoryStore(DirectoryWorkerStore, BaseSlavedStore): - pass diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index a72dad7464..fe47778cb1 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -29,8 +29,6 @@ from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import BaseSlavedStore - if TYPE_CHECKING: from synapse.server import HomeServer @@ -56,7 +54,6 @@ class SlavedEventStore( EventsWorkerStore, UserErasureWorkerStore, RelationsWorkerStore, - BaseSlavedStore, ): def __init__( self, diff --git a/synapse/replication/slave/storage/filtering.py b/synapse/replication/slave/storage/filtering.py index 4d185e2b56..c52679cd60 100644 --- a/synapse/replication/slave/storage/filtering.py +++ b/synapse/replication/slave/storage/filtering.py @@ -14,16 +14,15 @@ from typing import TYPE_CHECKING +from synapse.storage._base import SQLBaseStore from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main.filtering import FilteringStore -from ._base import BaseSlavedStore - if TYPE_CHECKING: from synapse.server import HomeServer -class SlavedFilteringStore(BaseSlavedStore): +class SlavedFilteringStore(SQLBaseStore): def __init__( self, database: DatabasePool, diff --git a/synapse/replication/slave/storage/profile.py b/synapse/replication/slave/storage/profile.py deleted file mode 100644 index 99f4a22642..0000000000 --- a/synapse/replication/slave/storage/profile.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.replication.slave.storage._base import BaseSlavedStore -from synapse.storage.databases.main.profile import ProfileWorkerStore - - -class SlavedProfileStore(ProfileWorkerStore, BaseSlavedStore): - pass diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index de642bba71..44ed20e424 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -18,14 +18,13 @@ from synapse.replication.tcp.streams import PushersStream from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main.pusher import PusherWorkerStore -from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker if TYPE_CHECKING: from synapse.server import HomeServer -class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore): +class SlavedPusherStore(PusherWorkerStore): def __init__( self, database: DatabasePool, diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py deleted file mode 100644 index 3826b87dec..0000000000 --- a/synapse/replication/slave/storage/receipts.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.storage.databases.main.receipts import ReceiptsWorkerStore - -from ._base import BaseSlavedStore - - -class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): - pass diff --git a/synapse/replication/slave/storage/registration.py b/synapse/replication/slave/storage/registration.py deleted file mode 100644 index 5dae35a960..0000000000 --- a/synapse/replication/slave/storage/registration.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.storage.databases.main.registration import RegistrationWorkerStore - -from ._base import BaseSlavedStore - - -class SlavedRegistrationStore(RegistrationWorkerStore, BaseSlavedStore): - pass diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 2f59245058..e4f2201c92 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -21,7 +21,7 @@ from twisted.internet.interfaces import IAddress, IConnector from twisted.internet.protocol import ReconnectingClientFactory from twisted.python.failure import Failure -from synapse.api.constants import EventTypes, ReceiptTypes +from synapse.api.constants import EventTypes, Membership, ReceiptTypes from synapse.federation import send_queue from synapse.federation.sender import FederationSender from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable @@ -219,6 +219,21 @@ class ReplicationDataHandler: membership=row.data.membership, ) + # If this event is a join, make a note of it so we have an accurate + # cross-worker room rate limit. + # TODO: Erik said we should exclude rows that came from ex_outliers + # here, but I don't see how we can determine that. I guess we could + # add a flag to row.data? + if ( + row.data.type == EventTypes.Member + and row.data.membership == Membership.JOIN + and not row.data.outlier + ): + # TODO retrieve the previous state, and exclude join -> join transitions + self.notifier.notify_user_joined_room( + row.data.event_id, row.data.room_id + ) + await self._presence_handler.process_replication_rows( stream_name, instance_name, token, rows ) diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index 26f4fa7cfd..14b6705862 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -98,6 +98,7 @@ class EventsStreamEventRow(BaseEventsStreamRow): relates_to: Optional[str] membership: Optional[str] rejected: bool + outlier: bool @attr.s(slots=True, frozen=True, auto_attribs=True) diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index f0614a2897..ba2f7fa6d8 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -373,6 +373,7 @@ class UserRestServletV2(RestServlet): if ( self.hs.config.email.email_enable_notifs and self.hs.config.email.email_notif_for_new_users + and medium == "email" ): await self.pusher_pool.add_pusher( user_id=user_id, diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index bdc4a9c068..0cc87a4001 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -28,7 +28,6 @@ from synapse.api.errors import ( SynapseError, ThreepidValidationError, ) -from synapse.config.emailconfig import ThreepidBehaviour from synapse.handlers.ui_auth import UIAuthSessionDataConstants from synapse.http.server import HttpServer, finish_request, respond_with_html from synapse.http.servlet import ( @@ -64,7 +63,7 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): self.config = hs.config self.identity_handler = hs.get_identity_handler() - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.can_verify_email: self.mailer = Mailer( hs=self.hs, app_name=self.config.email.email_app_name, @@ -73,11 +72,10 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): ) async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.email.local_threepid_handling_disabled_due_to_email_config: - logger.warning( - "User password resets have been disabled due to lack of email config" - ) + if not self.config.email.can_verify_email: + logger.warning( + "User password resets have been disabled due to lack of email config" + ) raise SynapseError( 400, "Email-based password resets have been disabled on this server" ) @@ -129,35 +127,21 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND) - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - assert self.hs.config.registration.account_threepid_delegate_email - - # Have the configured identity server handle the request - ret = await self.identity_handler.requestEmailToken( - self.hs.config.registration.account_threepid_delegate_email, - email, - client_secret, - send_attempt, - next_link, - ) - else: - # Send password reset emails from Synapse - sid = await self.identity_handler.send_threepid_validation( - email, - client_secret, - send_attempt, - self.mailer.send_password_reset_mail, - next_link, - ) - - # Wrap the session id in a JSON object - ret = {"sid": sid} + # Send password reset emails from Synapse + sid = await self.identity_handler.send_threepid_validation( + email, + client_secret, + send_attempt, + self.mailer.send_password_reset_mail, + next_link, + ) threepid_send_requests.labels(type="email", reason="password_reset").observe( send_attempt ) - return 200, ret + # Wrap the session id in a JSON object + return 200, {"sid": sid} class PasswordRestServlet(RestServlet): @@ -349,7 +333,7 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): self.identity_handler = hs.get_identity_handler() self.store = self.hs.get_datastores().main - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.can_verify_email: self.mailer = Mailer( hs=self.hs, app_name=self.config.email.email_app_name, @@ -358,11 +342,10 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): ) async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.email.local_threepid_handling_disabled_due_to_email_config: - logger.warning( - "Adding emails have been disabled due to lack of an email config" - ) + if not self.config.email.can_verify_email: + logger.warning( + "Adding emails have been disabled due to lack of an email config" + ) raise SynapseError( 400, "Adding an email to your account is disabled on this server" ) @@ -413,35 +396,20 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - assert self.hs.config.registration.account_threepid_delegate_email - - # Have the configured identity server handle the request - ret = await self.identity_handler.requestEmailToken( - self.hs.config.registration.account_threepid_delegate_email, - email, - client_secret, - send_attempt, - next_link, - ) - else: - # Send threepid validation emails from Synapse - sid = await self.identity_handler.send_threepid_validation( - email, - client_secret, - send_attempt, - self.mailer.send_add_threepid_mail, - next_link, - ) - - # Wrap the session id in a JSON object - ret = {"sid": sid} + sid = await self.identity_handler.send_threepid_validation( + email, + client_secret, + send_attempt, + self.mailer.send_add_threepid_mail, + next_link, + ) threepid_send_requests.labels(type="email", reason="add_threepid").observe( send_attempt ) - return 200, ret + # Wrap the session id in a JSON object + return 200, {"sid": sid} class MsisdnThreepidRequestTokenRestServlet(RestServlet): @@ -534,25 +502,18 @@ class AddThreepidEmailSubmitTokenServlet(RestServlet): self.config = hs.config self.clock = hs.get_clock() self.store = hs.get_datastores().main - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.can_verify_email: self._failure_email_template = ( self.config.email.email_add_threepid_template_failure_html ) async def on_GET(self, request: Request) -> None: - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.email.local_threepid_handling_disabled_due_to_email_config: - logger.warning( - "Adding emails have been disabled due to lack of an email config" - ) - raise SynapseError( - 400, "Adding an email to your account is disabled on this server" + if not self.config.email.can_verify_email: + logger.warning( + "Adding emails have been disabled due to lack of an email config" ) - elif self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: raise SynapseError( - 400, - "This homeserver is not validating threepids. Use an identity server " - "instead.", + 400, "Adding an email to your account is disabled on this server" ) sid = parse_string(request, "sid", required=True) @@ -743,10 +704,12 @@ class ThreepidBindRestServlet(RestServlet): async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: body = parse_json_object_from_request(request) - assert_params_in_dict(body, ["id_server", "sid", "client_secret"]) + assert_params_in_dict( + body, ["id_server", "sid", "id_access_token", "client_secret"] + ) id_server = body["id_server"] sid = body["sid"] - id_access_token = body.get("id_access_token") # optional + id_access_token = body["id_access_token"] client_secret = body["client_secret"] assert_valid_client_secret(client_secret) diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py index bfe985939b..f13970b898 100644 --- a/synapse/rest/client/account_data.py +++ b/synapse/rest/client/account_data.py @@ -15,11 +15,11 @@ import logging from typing import TYPE_CHECKING, Tuple -from synapse.api.errors import AuthError, NotFoundError, SynapseError +from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest -from synapse.types import JsonDict +from synapse.types import JsonDict, RoomID from ._base import client_patterns @@ -104,6 +104,13 @@ class RoomAccountDataServlet(RestServlet): if user_id != requester.user.to_string(): raise AuthError(403, "Cannot add account data for other users.") + if not RoomID.is_valid(room_id): + raise SynapseError( + 400, + f"{room_id} is not a valid room ID", + Codes.INVALID_PARAM, + ) + body = parse_json_object_from_request(request) if account_data_type == "m.fully_read": @@ -111,6 +118,7 @@ class RoomAccountDataServlet(RestServlet): 405, "Cannot set m.fully_read through this API." " Use /rooms/!roomId:server.name/read_markers", + Codes.BAD_JSON, ) await self.handler.add_account_data_to_room( @@ -130,6 +138,13 @@ class RoomAccountDataServlet(RestServlet): if user_id != requester.user.to_string(): raise AuthError(403, "Cannot get account data for other users.") + if not RoomID.is_valid(room_id): + raise SynapseError( + 400, + f"{room_id} is not a valid room ID", + Codes.INVALID_PARAM, + ) + event = await self.store.get_account_data_for_room_and_type( user_id, room_id, account_data_type ) diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py index e181a0dde2..bc1b18c92d 100644 --- a/synapse/rest/client/directory.py +++ b/synapse/rest/client/directory.py @@ -17,13 +17,7 @@ from typing import TYPE_CHECKING, Tuple from twisted.web.server import Request -from synapse.api.errors import ( - AuthError, - Codes, - InvalidClientCredentialsError, - NotFoundError, - SynapseError, -) +from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest @@ -52,6 +46,8 @@ class ClientDirectoryServer(RestServlet): self.auth = hs.get_auth() async def on_GET(self, request: Request, room_alias: str) -> Tuple[int, JsonDict]: + if not RoomAlias.is_valid(room_alias): + raise SynapseError(400, "Room alias invalid", errcode=Codes.INVALID_PARAM) room_alias_obj = RoomAlias.from_string(room_alias) res = await self.directory_handler.get_association(room_alias_obj) @@ -61,6 +57,8 @@ class ClientDirectoryServer(RestServlet): async def on_PUT( self, request: SynapseRequest, room_alias: str ) -> Tuple[int, JsonDict]: + if not RoomAlias.is_valid(room_alias): + raise SynapseError(400, "Room alias invalid", errcode=Codes.INVALID_PARAM) room_alias_obj = RoomAlias.from_string(room_alias) content = parse_json_object_from_request(request) @@ -95,31 +93,30 @@ class ClientDirectoryServer(RestServlet): async def on_DELETE( self, request: SynapseRequest, room_alias: str ) -> Tuple[int, JsonDict]: + if not RoomAlias.is_valid(room_alias): + raise SynapseError(400, "Room alias invalid", errcode=Codes.INVALID_PARAM) room_alias_obj = RoomAlias.from_string(room_alias) + requester = await self.auth.get_user_by_req(request) - try: - service = self.auth.get_appservice_by_req(request) + if requester.app_service: await self.directory_handler.delete_appservice_association( - service, room_alias_obj + requester.app_service, room_alias_obj ) + logger.info( "Application service at %s deleted alias %s", - service.url, + requester.app_service.url, room_alias_obj.to_string(), ) - return 200, {} - except InvalidClientCredentialsError: - # fallback to default user behaviour if they aren't an AS - pass - requester = await self.auth.get_user_by_req(request) - user = requester.user + else: + await self.directory_handler.delete_association(requester, room_alias_obj) - await self.directory_handler.delete_association(requester, room_alias_obj) - - logger.info( - "User %s deleted alias %s", user.to_string(), room_alias_obj.to_string() - ) + logger.info( + "User %s deleted alias %s", + requester.user.to_string(), + room_alias_obj.to_string(), + ) return 200, {} @@ -154,17 +151,6 @@ class ClientDirectoryListServer(RestServlet): return 200, {} - async def on_DELETE( - self, request: SynapseRequest, room_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - - await self.directory_handler.edit_published_room_list( - requester, room_id, "private" - ) - - return 200, {} - class ClientAppserviceDirectoryListServer(RestServlet): PATTERNS = client_patterns( diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py index ce806e3c11..e3f454896a 100644 --- a/synapse/rest/client/keys.py +++ b/synapse/rest/client/keys.py @@ -26,7 +26,7 @@ from synapse.http.servlet import ( parse_string, ) from synapse.http.site import SynapseRequest -from synapse.logging.opentracing import log_kv, set_tag, trace +from synapse.logging.opentracing import log_kv, set_tag, trace_with_opname from synapse.types import JsonDict, StreamToken from ._base import client_patterns, interactive_auth_handler @@ -71,7 +71,7 @@ class KeyUploadServlet(RestServlet): self.e2e_keys_handler = hs.get_e2e_keys_handler() self.device_handler = hs.get_device_handler() - @trace(opname="upload_keys") + @trace_with_opname("upload_keys") async def on_POST( self, request: SynapseRequest, device_id: Optional[str] ) -> Tuple[int, JsonDict]: @@ -208,7 +208,9 @@ class KeyChangesServlet(RestServlet): # We want to enforce they do pass us one, but we ignore it and return # changes after the "to" as well as before. - set_tag("to", parse_string(request, "to")) + # + # XXX This does not enforce that "to" is passed. + set_tag("to", str(parse_string(request, "to"))) from_token = await StreamToken.from_string(self.store, from_token_string) diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index cf4196ac0a..0437c87d8d 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -28,7 +28,7 @@ from typing import ( from typing_extensions import TypedDict -from synapse.api.errors import Codes, LoginError, SynapseError +from synapse.api.errors import Codes, InvalidClientTokenError, LoginError, SynapseError from synapse.api.ratelimiting import Ratelimiter from synapse.api.urls import CLIENT_API_PREFIX from synapse.appservice import ApplicationService @@ -172,7 +172,13 @@ class LoginRestServlet(RestServlet): try: if login_submission["type"] == LoginRestServlet.APPSERVICE_TYPE: - appservice = self.auth.get_appservice_by_req(request) + requester = await self.auth.get_user_by_req(request) + appservice = requester.app_service + + if appservice is None: + raise InvalidClientTokenError( + "This login method is only valid for application services" + ) if appservice.is_rate_limited(): await self._address_ratelimiter.ratelimit( @@ -420,17 +426,31 @@ class LoginRestServlet(RestServlet): 403, "Token field for JWT is missing", errcode=Codes.FORBIDDEN ) - import jwt + from authlib.jose import JsonWebToken, JWTClaims + from authlib.jose.errors import BadSignatureError, InvalidClaimError, JoseError + + jwt = JsonWebToken([self.jwt_algorithm]) + claim_options = {} + if self.jwt_issuer is not None: + claim_options["iss"] = {"value": self.jwt_issuer, "essential": True} + if self.jwt_audiences is not None: + claim_options["aud"] = {"values": self.jwt_audiences, "essential": True} try: - payload = jwt.decode( + claims = jwt.decode( token, - self.jwt_secret, - algorithms=[self.jwt_algorithm], - issuer=self.jwt_issuer, - audience=self.jwt_audiences, + key=self.jwt_secret, + claims_cls=JWTClaims, + claims_options=claim_options, + ) + except BadSignatureError: + # We handle this case separately to provide a better error message + raise LoginError( + 403, + "JWT validation failed: Signature verification failed", + errcode=Codes.FORBIDDEN, ) - except jwt.PyJWTError as e: + except JoseError as e: # A JWT error occurred, return some info back to the client. raise LoginError( 403, @@ -438,7 +458,23 @@ class LoginRestServlet(RestServlet): errcode=Codes.FORBIDDEN, ) - user = payload.get(self.jwt_subject_claim, None) + try: + claims.validate(leeway=120) # allows 2 min of clock skew + + # Enforce the old behavior which is rolled out in productive + # servers: if the JWT contains an 'aud' claim but none is + # configured, the login attempt will fail + if claims.get("aud") is not None: + if self.jwt_audiences is None or len(self.jwt_audiences) == 0: + raise InvalidClaimError("aud") + except JoseError as e: + raise LoginError( + 403, + "JWT validation failed: %s" % (str(e),), + errcode=Codes.FORBIDDEN, + ) + + user = claims.get(self.jwt_subject_claim, None) if user is None: raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN) diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py index 3644705e6a..8896f2df50 100644 --- a/synapse/rest/client/read_marker.py +++ b/synapse/rest/client/read_marker.py @@ -40,6 +40,10 @@ class ReadMarkerRestServlet(RestServlet): self.read_marker_handler = hs.get_read_marker_handler() self.presence_handler = hs.get_presence_handler() + self._known_receipt_types = {ReceiptTypes.READ, ReceiptTypes.FULLY_READ} + if hs.config.experimental.msc2285_enabled: + self._known_receipt_types.add(ReceiptTypes.READ_PRIVATE) + async def on_POST( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: @@ -49,13 +53,7 @@ class ReadMarkerRestServlet(RestServlet): body = parse_json_object_from_request(request) - valid_receipt_types = { - ReceiptTypes.READ, - ReceiptTypes.FULLY_READ, - ReceiptTypes.READ_PRIVATE, - } - - unrecognized_types = set(body.keys()) - valid_receipt_types + unrecognized_types = set(body.keys()) - self._known_receipt_types if unrecognized_types: # It's fine if there are unrecognized receipt types, but let's log # it to help debug clients that have typoed the receipt type. @@ -65,31 +63,25 @@ class ReadMarkerRestServlet(RestServlet): # types. logger.info("Ignoring unrecognized receipt types: %s", unrecognized_types) - read_event_id = body.get(ReceiptTypes.READ, None) - if read_event_id: - await self.receipts_handler.received_client_receipt( - room_id, - ReceiptTypes.READ, - user_id=requester.user.to_string(), - event_id=read_event_id, - ) - - read_private_event_id = body.get(ReceiptTypes.READ_PRIVATE, None) - if read_private_event_id and self.config.experimental.msc2285_enabled: - await self.receipts_handler.received_client_receipt( - room_id, - ReceiptTypes.READ_PRIVATE, - user_id=requester.user.to_string(), - event_id=read_private_event_id, - ) - - read_marker_event_id = body.get(ReceiptTypes.FULLY_READ, None) - if read_marker_event_id: - await self.read_marker_handler.received_client_read_marker( - room_id, - user_id=requester.user.to_string(), - event_id=read_marker_event_id, - ) + for receipt_type in self._known_receipt_types: + event_id = body.get(receipt_type, None) + # TODO Add validation to reject non-string event IDs. + if not event_id: + continue + + if receipt_type == ReceiptTypes.FULLY_READ: + await self.read_marker_handler.received_client_read_marker( + room_id, + user_id=requester.user.to_string(), + event_id=event_id, + ) + else: + await self.receipts_handler.received_client_receipt( + room_id, + receipt_type, + user_id=requester.user.to_string(), + event_id=event_id, + ) return 200, {} diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py index 4b03eb876b..409bfd43c1 100644 --- a/synapse/rest/client/receipts.py +++ b/synapse/rest/client/receipts.py @@ -39,31 +39,27 @@ class ReceiptRestServlet(RestServlet): def __init__(self, hs: "HomeServer"): super().__init__() - self.hs = hs self.auth = hs.get_auth() self.receipts_handler = hs.get_receipts_handler() self.read_marker_handler = hs.get_read_marker_handler() self.presence_handler = hs.get_presence_handler() + self._known_receipt_types = {ReceiptTypes.READ} + if hs.config.experimental.msc2285_enabled: + self._known_receipt_types.update( + (ReceiptTypes.READ_PRIVATE, ReceiptTypes.FULLY_READ) + ) + async def on_POST( self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - if self.hs.config.experimental.msc2285_enabled and receipt_type not in [ - ReceiptTypes.READ, - ReceiptTypes.READ_PRIVATE, - ReceiptTypes.FULLY_READ, - ]: + if receipt_type not in self._known_receipt_types: raise SynapseError( 400, - "Receipt type must be 'm.read', 'org.matrix.msc2285.read.private' or 'm.fully_read'", + f"Receipt type must be {', '.join(self._known_receipt_types)}", ) - elif ( - not self.hs.config.experimental.msc2285_enabled - and receipt_type != ReceiptTypes.READ - ): - raise SynapseError(400, "Receipt type must be 'm.read'") parse_json_object_from_request(request, allow_empty_body=False) diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index e8e51a9c66..a8402cdb3a 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -31,7 +31,6 @@ from synapse.api.errors import ( ) from synapse.api.ratelimiting import Ratelimiter from synapse.config import ConfigError -from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.homeserver import HomeServerConfig from synapse.config.ratelimiting import FederationRateLimitConfig from synapse.config.server import is_threepid_reserved @@ -74,7 +73,7 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): self.identity_handler = hs.get_identity_handler() self.config = hs.config - if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.hs.config.email.can_verify_email: self.mailer = Mailer( hs=self.hs, app_name=self.config.email.email_app_name, @@ -83,13 +82,10 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): ) async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: - if ( - self.hs.config.email.local_threepid_handling_disabled_due_to_email_config - ): - logger.warning( - "Email registration has been disabled due to lack of email config" - ) + if not self.hs.config.email.can_verify_email: + logger.warning( + "Email registration has been disabled due to lack of email config" + ) raise SynapseError( 400, "Email-based registration has been disabled on this server" ) @@ -138,35 +134,21 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - assert self.hs.config.registration.account_threepid_delegate_email - - # Have the configured identity server handle the request - ret = await self.identity_handler.requestEmailToken( - self.hs.config.registration.account_threepid_delegate_email, - email, - client_secret, - send_attempt, - next_link, - ) - else: - # Send registration emails from Synapse - sid = await self.identity_handler.send_threepid_validation( - email, - client_secret, - send_attempt, - self.mailer.send_registration_mail, - next_link, - ) - - # Wrap the session id in a JSON object - ret = {"sid": sid} + # Send registration emails from Synapse + sid = await self.identity_handler.send_threepid_validation( + email, + client_secret, + send_attempt, + self.mailer.send_registration_mail, + next_link, + ) threepid_send_requests.labels(type="email", reason="register").observe( send_attempt ) - return 200, ret + # Wrap the session id in a JSON object + return 200, {"sid": sid} class MsisdnRegisterRequestTokenRestServlet(RestServlet): @@ -260,7 +242,7 @@ class RegistrationSubmitTokenServlet(RestServlet): self.clock = hs.get_clock() self.store = hs.get_datastores().main - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.can_verify_email: self._failure_email_template = ( self.config.email.email_registration_template_failure_html ) @@ -270,11 +252,10 @@ class RegistrationSubmitTokenServlet(RestServlet): raise SynapseError( 400, "This medium is currently not supported for registration" ) - if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.email.local_threepid_handling_disabled_due_to_email_config: - logger.warning( - "User registration via email has been disabled due to lack of email config" - ) + if not self.config.email.can_verify_email: + logger.warning( + "User registration via email has been disabled due to lack of email config" + ) raise SynapseError( 400, "Email-based registration is disabled on this server" ) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index a26e976492..2f513164cb 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -1177,7 +1177,9 @@ class TimestampLookupRestServlet(RestServlet): self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: requester = await self._auth.get_user_by_req(request) - await self._auth.check_user_in_room(room_id, requester.user.to_string()) + await self._auth.check_user_in_room_or_world_readable( + room_id, requester.user.to_string() + ) timestamp = parse_integer(request, "ts", required=True) direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"]) diff --git a/synapse/rest/client/room_keys.py b/synapse/rest/client/room_keys.py index 37e39570f6..f7081f638e 100644 --- a/synapse/rest/client/room_keys.py +++ b/synapse/rest/client/room_keys.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple, cast from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer @@ -127,7 +127,7 @@ class RoomKeysServlet(RestServlet): requester = await self.auth.get_user_by_req(request, allow_guest=False) user_id = requester.user.to_string() body = parse_json_object_from_request(request) - version = parse_string(request, "version") + version = parse_string(request, "version", required=True) if session_id: body = {"sessions": {session_id: body}} @@ -196,8 +196,11 @@ class RoomKeysServlet(RestServlet): user_id = requester.user.to_string() version = parse_string(request, "version", required=True) - room_keys = await self.e2e_room_keys_handler.get_room_keys( - user_id, version, room_id, session_id + room_keys = cast( + JsonDict, + await self.e2e_room_keys_handler.get_room_keys( + user_id, version, room_id, session_id + ), ) # Convert room_keys to the right format to return. @@ -240,7 +243,7 @@ class RoomKeysServlet(RestServlet): requester = await self.auth.get_user_by_req(request, allow_guest=False) user_id = requester.user.to_string() - version = parse_string(request, "version") + version = parse_string(request, "version", required=True) ret = await self.e2e_room_keys_handler.delete_room_keys( user_id, version, room_id, session_id diff --git a/synapse/rest/client/sendtodevice.py b/synapse/rest/client/sendtodevice.py index 3322c8ef48..1a8e9a96d4 100644 --- a/synapse/rest/client/sendtodevice.py +++ b/synapse/rest/client/sendtodevice.py @@ -19,7 +19,7 @@ from synapse.http import servlet from synapse.http.server import HttpServer from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_request from synapse.http.site import SynapseRequest -from synapse.logging.opentracing import set_tag, trace +from synapse.logging.opentracing import set_tag, trace_with_opname from synapse.rest.client.transactions import HttpTransactionCache from synapse.types import JsonDict @@ -43,7 +43,7 @@ class SendToDeviceRestServlet(servlet.RestServlet): self.txns = HttpTransactionCache(hs) self.device_message_handler = hs.get_device_message_handler() - @trace(opname="sendToDevice") + @trace_with_opname("sendToDevice") def on_PUT( self, request: SynapseRequest, message_type: str, txn_id: str ) -> Awaitable[Tuple[int, JsonDict]]: diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 8bbf35148d..c2989765ce 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -37,7 +37,7 @@ from synapse.handlers.sync import ( from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string from synapse.http.site import SynapseRequest -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import trace_with_opname from synapse.types import JsonDict, StreamToken from synapse.util import json_decoder @@ -210,7 +210,7 @@ class SyncRestServlet(RestServlet): logger.debug("Event formatting complete") return 200, response_content - @trace(opname="sync.encode_response") + @trace_with_opname("sync.encode_response") async def encode_response( self, time_now: int, @@ -315,7 +315,7 @@ class SyncRestServlet(RestServlet): ] } - @trace(opname="sync.encode_joined") + @trace_with_opname("sync.encode_joined") async def encode_joined( self, rooms: List[JoinedSyncResult], @@ -340,7 +340,7 @@ class SyncRestServlet(RestServlet): return joined - @trace(opname="sync.encode_invited") + @trace_with_opname("sync.encode_invited") async def encode_invited( self, rooms: List[InvitedSyncResult], @@ -371,7 +371,7 @@ class SyncRestServlet(RestServlet): return invited - @trace(opname="sync.encode_knocked") + @trace_with_opname("sync.encode_knocked") async def encode_knocked( self, rooms: List[KnockedSyncResult], @@ -420,7 +420,7 @@ class SyncRestServlet(RestServlet): return knocked - @trace(opname="sync.encode_archived") + @trace_with_opname("sync.encode_archived") async def encode_archived( self, rooms: List[ArchivedSyncResult], diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index c1bd775fec..f4f06563dd 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -95,6 +95,8 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3026.busy_presence": self.config.experimental.msc3026_enabled, # Supports receiving private read receipts as per MSC2285 "org.matrix.msc2285": self.config.experimental.msc2285_enabled, + # Supports filtering of /publicRooms by room type MSC3827 + "org.matrix.msc3827": self.config.experimental.msc3827_enabled, # Adds support for importing historical messages as per MSC2716 "org.matrix.msc2716": self.config.experimental.msc2716_enabled, # Adds support for jump to date endpoints (/timestamp_to_event) as per MSC3030 diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 6180fa575e..048a042692 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -15,7 +15,11 @@ import logging from typing import TYPE_CHECKING -from synapse.http.server import DirectServeJsonResource, set_cors_headers +from synapse.http.server import ( + DirectServeJsonResource, + set_corp_headers, + set_cors_headers, +) from synapse.http.servlet import parse_boolean from synapse.http.site import SynapseRequest @@ -38,6 +42,7 @@ class DownloadResource(DirectServeJsonResource): async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) + set_corp_headers(request) request.setHeader( b"Content-Security-Policy", b"sandbox;" diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 9137417342..a5c3de192f 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -154,7 +154,9 @@ class MediaStorage: # Note that we'll delete the stored media, due to the # try/except below. The media also won't be stored in # the DB. - raise SpamMediaException(errcode=spam_check) + # We currently ignore any additional field returned by + # the spam-check API. + raise SpamMediaException(errcode=spam_check[0]) for provider in self.storage_providers: await provider.store_file(path, file_info) diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py index ed8f21a483..516d0434f0 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py @@ -12,10 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. import codecs -import itertools import logging import re -from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Set, Union +from typing import ( + TYPE_CHECKING, + Callable, + Dict, + Generator, + Iterable, + List, + Optional, + Set, + Union, +) if TYPE_CHECKING: from lxml import etree @@ -146,6 +155,70 @@ def decode_body( return etree.fromstring(body, parser) +def _get_meta_tags( + tree: "etree.Element", + property: str, + prefix: str, + property_mapper: Optional[Callable[[str], Optional[str]]] = None, +) -> Dict[str, Optional[str]]: + """ + Search for meta tags prefixed with a particular string. + + Args: + tree: The parsed HTML document. + property: The name of the property which contains the tag name, e.g. + "property" for Open Graph. + prefix: The prefix on the property to search for, e.g. "og" for Open Graph. + property_mapper: An optional callable to map the property to the Open Graph + form. Can return None for a key to ignore that key. + + Returns: + A map of tag name to value. + """ + results: Dict[str, Optional[str]] = {} + for tag in tree.xpath( + f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]" + ): + # if we've got more than 50 tags, someone is taking the piss + if len(results) >= 50: + logger.warning( + "Skipping parsing of Open Graph for page with too many '%s:' tags", + prefix, + ) + return {} + + key = tag.attrib[property] + if property_mapper: + key = property_mapper(key) + # None is a special value used to ignore a value. + if key is None: + continue + + results[key] = tag.attrib["content"] + + return results + + +def _map_twitter_to_open_graph(key: str) -> Optional[str]: + """ + Map a Twitter card property to the analogous Open Graph property. + + Args: + key: The Twitter card property (starts with "twitter:"). + + Returns: + The Open Graph property (starts with "og:") or None to have this property + be ignored. + """ + # Twitter card properties with no analogous Open Graph property. + if key == "twitter:card" or key == "twitter:creator": + return None + if key == "twitter:site": + return "og:site_name" + # Otherwise, swap twitter to og. + return "og" + key[7:] + + def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: """ Parse the HTML document into an Open Graph response. @@ -160,10 +233,8 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: The Open Graph response as a dictionary. """ - # if we see any image URLs in the OG response, then spider them - # (although the client could choose to do this by asking for previews of those - # URLs to avoid DoSing the server) - + # Search for Open Graph (og:) meta tags, e.g.: + # # "og:type" : "video", # "og:url" : "https://www.youtube.com/watch?v=LXDBoHyjmtw", # "og:site_name" : "YouTube", @@ -176,19 +247,11 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: # "og:video:height" : "720", # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3", - og: Dict[str, Optional[str]] = {} - for tag in tree.xpath( - "//*/meta[starts-with(@property, 'og:')][@content][not(@content='')]" - ): - # if we've got more than 50 tags, someone is taking the piss - if len(og) >= 50: - logger.warning("Skipping OG for page with too many 'og:' tags") - return {} - - og[tag.attrib["property"]] = tag.attrib["content"] - - # TODO: grab article: meta tags too, e.g.: + og = _get_meta_tags(tree, "property", "og") + # TODO: Search for properties specific to the different Open Graph types, + # such as article: meta tags, e.g.: + # # "article:publisher" : "https://www.facebook.com/thethudonline" /> # "article:author" content="https://www.facebook.com/thethudonline" /> # "article:tag" content="baby" /> @@ -196,6 +259,21 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: # "article:published_time" content="2016-03-31T19:58:24+00:00" /> # "article:modified_time" content="2016-04-01T18:31:53+00:00" /> + # Search for Twitter Card (twitter:) meta tags, e.g.: + # + # "twitter:site" : "@matrixdotorg" + # "twitter:creator" : "@matrixdotorg" + # + # Twitter cards tags also duplicate Open Graph tags. + # + # See https://developer.twitter.com/en/docs/twitter-for-websites/cards/guides/getting-started + twitter = _get_meta_tags(tree, "name", "twitter", _map_twitter_to_open_graph) + # Merge the Twitter values with the Open Graph values, but do not overwrite + # information from Open Graph tags. + for key, value in twitter.items(): + if key not in og: + og[key] = value + if "og:title" not in og: # Attempt to find a title from the title tag, or the biggest header on the page. title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()") @@ -276,7 +354,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: from lxml import etree - TAGS_TO_REMOVE = ( + TAGS_TO_REMOVE = { "header", "nav", "aside", @@ -291,31 +369,42 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: "img", "picture", etree.Comment, - ) + } # Split all the text nodes into paragraphs (by splitting on new # lines) text_nodes = ( re.sub(r"\s+", "\n", el).strip() - for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE) + for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE) ) return summarize_paragraphs(text_nodes) def _iterate_over_text( - tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"] + tree: Optional["etree.Element"], + tags_to_ignore: Set[Union[str, "etree.Comment"]], + stack_limit: int = 1024, ) -> Generator[str, None, None]: """Iterate over the tree returning text nodes in a depth first fashion, skipping text nodes inside certain tags. + + Args: + tree: The parent element to iterate. Can be None if there isn't one. + tags_to_ignore: Set of tags to ignore + stack_limit: Maximum stack size limit for depth-first traversal. + Nodes will be dropped if this limit is hit, which may truncate the + textual result. + Intended to limit the maximum working memory when generating a preview. """ - # This is basically a stack that we extend using itertools.chain. - # This will either consist of an element to iterate over *or* a string + + if tree is None: + return + + # This is a stack whose items are elements to iterate over *or* strings # to be returned. - elements = iter([tree]) - while True: - el = next(elements, None) - if el is None: - return + elements: List[Union[str, "etree.Element"]] = [tree] + while elements: + el = elements.pop() if isinstance(el, str): yield el @@ -329,17 +418,22 @@ def _iterate_over_text( if el.text: yield el.text - # We add to the stack all the elements children, interspersed with - # each child's tail text (if it exists). The tail text of a node - # is text that comes *after* the node, so we always include it even - # if we ignore the child node. - elements = itertools.chain( - itertools.chain.from_iterable( # Basically a flatmap - [child, child.tail] if child.tail else [child] - for child in el.iterchildren() - ), - elements, - ) + # We add to the stack all the element's children, interspersed with + # each child's tail text (if it exists). + # + # We iterate in reverse order so that earlier pieces of text appear + # closer to the top of the stack. + for child in el.iterchildren(reversed=True): + if len(elements) > stack_limit: + # We've hit our limit for working memory + break + + if child.tail: + # The tail text of a node is text that comes *after* the node, + # so we always include it even if we ignore the child node. + elements.append(child.tail) + + elements.append(child) def summarize_paragraphs( diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 54a849eac9..b36c98a08e 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -109,10 +109,64 @@ class MediaInfo: class PreviewUrlResource(DirectServeJsonResource): """ - Generating URL previews is a complicated task which many potential pitfalls. - - See docs/development/url_previews.md for discussion of the design and - algorithm followed in this module. + The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API + for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix + specific additions). + + This does have trade-offs compared to other designs: + + * Pros: + * Simple and flexible; can be used by any clients at any point + * Cons: + * If each homeserver provides one of these independently, all the homeservers in a + room may needlessly DoS the target URI + * The URL metadata must be stored somewhere, rather than just using Matrix + itself to store the media. + * Matrix cannot be used to distribute the metadata between homeservers. + + When Synapse is asked to preview a URL it does the following: + + 1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the + config). + 2. Checks the URL against an in-memory cache and returns the result if it exists. (This + is also used to de-duplicate processing of multiple in-flight requests at once.) + 3. Kicks off a background process to generate a preview: + 1. Checks URL and timestamp against the database cache and returns the result if it + has not expired and was successful (a 2xx return code). + 2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it + does, update the URL to download. + 3. Downloads the URL and stores it into a file via the media storage provider + and saves the local media metadata. + 4. If the media is an image: + 1. Generates thumbnails. + 2. Generates an Open Graph response based on image properties. + 5. If the media is HTML: + 1. Decodes the HTML via the stored file. + 2. Generates an Open Graph response from the HTML. + 3. If a JSON oEmbed URL was found in the HTML via autodiscovery: + 1. Downloads the URL and stores it into a file via the media storage provider + and saves the local media metadata. + 2. Convert the oEmbed response to an Open Graph response. + 3. Override any Open Graph data from the HTML with data from oEmbed. + 4. If an image exists in the Open Graph response: + 1. Downloads the URL and stores it into a file via the media storage + provider and saves the local media metadata. + 2. Generates thumbnails. + 3. Updates the Open Graph response based on image properties. + 6. If the media is JSON and an oEmbed URL was found: + 1. Convert the oEmbed response to an Open Graph response. + 2. If a thumbnail or image is in the oEmbed response: + 1. Downloads the URL and stores it into a file via the media storage + provider and saves the local media metadata. + 2. Generates thumbnails. + 3. Updates the Open Graph response based on image properties. + 7. Stores the result in the database cache. + 4. Returns the result. + + The in-memory cache expires after 1 hour. + + Expired entries in the database cache (and their associated media files) are + deleted every 10 seconds. The default expiration time is 1 hour from download. """ isLeaf = True diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 53b1565243..5f725c7600 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -17,8 +17,14 @@ import logging from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple -from synapse.api.errors import SynapseError -from synapse.http.server import DirectServeJsonResource, set_cors_headers +from synapse.api.errors import Codes, SynapseError, cs_error +from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP +from synapse.http.server import ( + DirectServeJsonResource, + respond_with_json, + set_corp_headers, + set_cors_headers, +) from synapse.http.servlet import parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.rest.media.v1.media_storage import MediaStorage @@ -58,6 +64,7 @@ class ThumbnailResource(DirectServeJsonResource): async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) + set_corp_headers(request) server_name, media_id, _ = parse_media_id(request) width = parse_integer(request, "width", required=True) height = parse_integer(request, "height", required=True) @@ -304,6 +311,19 @@ class ThumbnailResource(DirectServeJsonResource): url_cache: True if this is from a URL cache. server_name: The server name, if this is a remote thumbnail. """ + logger.debug( + "_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s", + media_id, + desired_width, + desired_height, + desired_method, + thumbnail_infos, + ) + + # If `dynamic_thumbnails` is enabled, we expect Synapse to go down a + # different code path to handle it. + assert not self.dynamic_thumbnails + if thumbnail_infos: file_info = self._select_thumbnail( desired_width, @@ -379,8 +399,29 @@ class ThumbnailResource(DirectServeJsonResource): file_info.thumbnail.length, ) else: + # This might be because: + # 1. We can't create thumbnails for the given media (corrupted or + # unsupported file type), or + # 2. The thumbnailing process never ran or errored out initially + # when the media was first uploaded (these bugs should be + # reported and fixed). + # Note that we don't attempt to generate a thumbnail now because + # `dynamic_thumbnails` is disabled. logger.info("Failed to find any generated thumbnails") - respond_404(request) + + respond_with_json( + request, + 400, + cs_error( + "Cannot find any thumbnails for the requested media (%r). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)" + % ( + request.postpath, + ", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()), + ), + code=Codes.UNKNOWN, + ), + send_cors=True, + ) def _select_thumbnail( self, diff --git a/synapse/rest/synapse/client/password_reset.py b/synapse/rest/synapse/client/password_reset.py index 6ac9dbc7c9..b9402cfb75 100644 --- a/synapse/rest/synapse/client/password_reset.py +++ b/synapse/rest/synapse/client/password_reset.py @@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, Tuple from twisted.web.server import Request from synapse.api.errors import ThreepidValidationError -from synapse.config.emailconfig import ThreepidBehaviour from synapse.http.server import DirectServeHtmlResource from synapse.http.servlet import parse_string from synapse.util.stringutils import assert_valid_client_secret @@ -46,9 +45,6 @@ class PasswordResetSubmitTokenResource(DirectServeHtmlResource): self.clock = hs.get_clock() self.store = hs.get_datastores().main - self._local_threepid_handling_disabled_due_to_email_config = ( - hs.config.email.local_threepid_handling_disabled_due_to_email_config - ) self._confirmation_email_template = ( hs.config.email.email_password_reset_template_confirmation_html ) @@ -59,8 +55,8 @@ class PasswordResetSubmitTokenResource(DirectServeHtmlResource): hs.config.email.email_password_reset_template_failure_html ) - # This resource should not be mounted if threepid behaviour is not LOCAL - assert hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL + # This resource should only be mounted if email validation is enabled + assert hs.config.email.can_verify_email async def _async_render_GET(self, request: Request) -> Tuple[int, bytes]: sid = parse_string(request, "sid", required=True) diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py index 04b035a1b1..6f7ac54c65 100644 --- a/synapse/rest/well_known.py +++ b/synapse/rest/well_known.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import logging from typing import TYPE_CHECKING, Optional @@ -44,6 +43,14 @@ class WellKnownBuilder: "base_url": self._config.registration.default_identity_server } + if self._config.server.extra_well_known_client_content: + for ( + key, + value, + ) in self._config.server.extra_well_known_client_content.items(): + if key not in result: + result[key] = value + return result diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 9d3fe66100..87ccd52f0a 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. import heapq import logging -from collections import defaultdict +from collections import ChainMap, defaultdict from typing import ( TYPE_CHECKING, Any, @@ -24,14 +24,12 @@ from typing import ( DefaultDict, Dict, FrozenSet, - Iterable, List, Mapping, Optional, Sequence, Set, Tuple, - Union, ) import attr @@ -43,9 +41,11 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersio from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.logging.context import ContextResourceUsage +from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.roommember import ProfileInfo +from synapse.storage.state import StateFilter from synapse.types import StateMap from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache @@ -53,6 +53,7 @@ from synapse.util.metrics import Measure, measure_func if TYPE_CHECKING: from synapse.server import HomeServer + from synapse.storage.controllers import StateStorageController from synapse.storage.databases.main import DataStore logger = logging.getLogger(__name__) @@ -82,17 +83,26 @@ def _gen_state_id() -> str: class _StateCacheEntry: - __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"] + __slots__ = ["_state", "state_group", "prev_group", "delta_ids"] def __init__( self, - state: StateMap[str], + state: Optional[StateMap[str]], state_group: Optional[int], prev_group: Optional[int] = None, delta_ids: Optional[StateMap[str]] = None, ): + if state is None and state_group is None and prev_group is None: + raise Exception("One of state, state_group or prev_group must be not None") + + if prev_group is not None and delta_ids is None: + raise Exception("If prev_group is set so must delta_ids") + # A map from (type, state_key) to event_id. - self.state = frozendict(state) + # + # This can be None if we have a `state_group` (as then we can fetch the + # state from the DB.) + self._state = frozendict(state) if state is not None else None # the ID of a state group if one and only one is involved. # otherwise, None otherwise? @@ -101,20 +111,60 @@ class _StateCacheEntry: self.prev_group = prev_group self.delta_ids = frozendict(delta_ids) if delta_ids is not None else None - # The `state_id` is a unique ID we generate that can be used as ID for - # this collection of state. Usually this would be the same as the - # state group, but on worker instances we can't generate a new state - # group each time we resolve state, so we generate a separate one that - # isn't persisted and is used solely for caches. - # `state_id` is either a state_group (and so an int) or a string. This - # ensures we don't accidentally persist a state_id as a stateg_group - if state_group: - self.state_id: Union[str, int] = state_group - else: - self.state_id = _gen_state_id() + async def get_state( + self, + state_storage: "StateStorageController", + state_filter: Optional["StateFilter"] = None, + ) -> StateMap[str]: + """Get the state map for this entry, either from the in-memory state or + looking up the state group in the DB. + """ + + if self._state is not None: + return self._state + + if self.state_group is not None: + return await state_storage.get_state_ids_for_group( + self.state_group, state_filter + ) + + assert self.prev_group is not None and self.delta_ids is not None + + prev_state = await state_storage.get_state_ids_for_group( + self.prev_group, state_filter + ) + + # ChainMap expects MutableMapping, but since we're using it immutably + # its safe to give it immutable maps. + return ChainMap(self.delta_ids, prev_state) # type: ignore[arg-type] + + def set_state_group(self, state_group: int) -> None: + """Update the state group assigned to this state (e.g. after we've + persisted it). + + Note: this will cause the cache entry to drop any stored state. + """ + + self.state_group = state_group + + # We clear out the state as we know longer need to explicitly keep it in + # the `state_cache` (as the store state group cache will do that). + self._state = None def __len__(self) -> int: - return len(self.state) + # The len should be used to estimate how large this cache entry is, for + # cache eviction purposes. This is why it's fine to return 1 if we're + # not storing any state. + + length = 0 + + if self._state: + length += len(self._state) + + if self.delta_ids: + length += len(self.delta_ids) + + return length or 1 # Make sure its not 0. class StateHandler: @@ -129,24 +179,36 @@ class StateHandler: self.hs = hs self._state_resolution_handler = hs.get_state_resolution_handler() self._storage_controllers = hs.get_storage_controllers() + self._events_shard_config = hs.config.worker.events_shard_config + self._instance_name = hs.get_instance_name() + + self._update_current_state_client = ( + ReplicationUpdateCurrentStateRestServlet.make_client(hs) + ) - async def get_current_state_ids( + async def compute_state_after_events( self, room_id: str, - latest_event_ids: Collection[str], + event_ids: Collection[str], + state_filter: Optional[StateFilter] = None, ) -> StateMap[str]: - """Get the current state, or the state at a set of events, for a room + """Fetch the state after each of the given event IDs. Resolve them and return. + + This is typically used where `event_ids` is a collection of forward extremities + in a room, intended to become the `prev_events` of a new event E. If so, the + return value of this function represents the state before E. Args: - room_id: - latest_event_ids: The forward extremities to resolve. + room_id: the room_id containing the given events. + event_ids: the events whose state should be fetched and resolved. Returns: - the state dict, mapping from (event_type, state_key) -> event_id + the state dict (a mapping from (event_type, state_key) -> event_id) which + holds the resolution of the states after the given event IDs. """ - logger.debug("calling resolve_state_groups from get_current_state_ids") - ret = await self.resolve_state_groups_for_events(room_id, latest_event_ids) - return ret.state + logger.debug("calling resolve_state_groups from compute_state_after_events") + ret = await self.resolve_state_groups_for_events(room_id, event_ids) + return await ret.get_state(self._state_storage_controller, state_filter) async def get_current_users_in_room( self, room_id: str, latest_event_ids: List[str] @@ -170,7 +232,8 @@ class StateHandler: logger.debug("calling resolve_state_groups from get_current_users_in_room") entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids) - return await self.store.get_joined_users_from_state(room_id, entry) + state = await entry.get_state(self._state_storage_controller, StateFilter.all()) + return await self.store.get_joined_users_from_state(room_id, state, entry) async def get_hosts_in_room_at_events( self, room_id: str, event_ids: Collection[str] @@ -185,7 +248,8 @@ class StateHandler: The hosts in the room at the given events """ entry = await self.resolve_state_groups_for_events(room_id, event_ids) - return await self.store.get_joined_hosts(room_id, entry) + state = await entry.get_state(self._state_storage_controller, StateFilter.all()) + return await self.store.get_joined_hosts(room_id, state, entry) async def compute_event_context( self, @@ -220,10 +284,19 @@ class StateHandler: # if state_ids_before_event: # if we're given the state before the event, then we use that - state_group_before_event = None state_group_before_event_prev_group = None deltas_to_state_group_before_event = None - entry = None + + # .. though we need to get a state group for it. + state_group_before_event = ( + await self._state_storage_controller.store_state_group( + event.event_id, + event.room_id, + prev_group=None, + delta_ids=None, + current_state_ids=state_ids_before_event, + ) + ) else: # otherwise, we'll need to resolve the state across the prev_events. @@ -249,40 +322,40 @@ class StateHandler: partial_state = True logger.debug("calling resolve_state_groups from compute_event_context") + # we've already taken into account partial state, so no need to wait for + # complete state here. entry = await self.resolve_state_groups_for_events( - event.room_id, event.prev_event_ids() + event.room_id, + event.prev_event_ids(), + await_full_state=False, ) - state_ids_before_event = entry.state - state_group_before_event = entry.state_group state_group_before_event_prev_group = entry.prev_group deltas_to_state_group_before_event = entry.delta_ids + state_ids_before_event = None + + # We make sure that we have a state group assigned to the state. + if entry.state_group is None: + # store_state_group requires us to have either a previous state group + # (with deltas) or the complete state map. So, if we don't have a + # previous state group, load the complete state map now. + if state_group_before_event_prev_group is None: + state_ids_before_event = await entry.get_state( + self._state_storage_controller, StateFilter.all() + ) - # - # make sure that we have a state group at that point. If it's not a state event, - # that will be the state group for the new event. If it *is* a state event, - # it might get rejected (in which case we'll need to persist it with the - # previous state group) - # - - if not state_group_before_event: - state_group_before_event = ( - await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=state_group_before_event_prev_group, - delta_ids=deltas_to_state_group_before_event, - current_state_ids=state_ids_before_event, + state_group_before_event = ( + await self._state_storage_controller.store_state_group( + event.event_id, + event.room_id, + prev_group=state_group_before_event_prev_group, + delta_ids=deltas_to_state_group_before_event, + current_state_ids=state_ids_before_event, + ) ) - ) - - # Assign the new state group to the cached state entry. - # - # Note that this can race in that we could generate multiple state - # groups for the same state entry, but that is just inefficient - # rather than dangerous. - if entry and entry.state_group is None: - entry.state_group = state_group_before_event + entry.set_state_group(state_group_before_event) + else: + state_group_before_event = entry.state_group # # now if it's not a state event, we're done @@ -304,13 +377,18 @@ class StateHandler: # key = (event.type, event.state_key) - if key in state_ids_before_event: - replaces = state_ids_before_event[key] - if replaces != event.event_id: - event.unsigned["replaces_state"] = replaces - state_ids_after_event = dict(state_ids_before_event) - state_ids_after_event[key] = event.event_id + if state_ids_before_event is not None: + replaces = state_ids_before_event.get(key) + else: + replaces_state_map = await entry.get_state( + self._state_storage_controller, StateFilter.from_types([key]) + ) + replaces = replaces_state_map.get(key) + + if replaces and replaces != event.event_id: + event.unsigned["replaces_state"] = replaces + delta_ids = {key: event.event_id} state_group_after_event = ( @@ -319,7 +397,7 @@ class StateHandler: event.room_id, prev_group=state_group_before_event, delta_ids=delta_ids, - current_state_ids=state_ids_after_event, + current_state_ids=None, ) ) @@ -335,7 +413,7 @@ class StateHandler: @measure_func() async def resolve_state_groups_for_events( - self, room_id: str, event_ids: Collection[str] + self, room_id: str, event_ids: Collection[str], await_full_state: bool = True ) -> _StateCacheEntry: """Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -343,6 +421,8 @@ class StateHandler: Args: room_id event_ids + await_full_state: if true, will block if we do not yet have complete + state at these events. Returns: The resolved state @@ -350,7 +430,7 @@ class StateHandler: logger.debug("resolve_state_groups event_ids %s", event_ids) state_groups = await self._state_storage_controller.get_state_group_for_events( - event_ids + event_ids, await_full_state=await_full_state ) state_group_ids = state_groups.values() @@ -359,9 +439,6 @@ class StateHandler: state_group_ids_set = set(state_group_ids) if len(state_group_ids_set) == 1: (state_group_id,) = state_group_ids_set - state = await self._state_storage_controller.get_state_for_groups( - state_group_ids_set - ) ( prev_group, delta_ids, @@ -369,7 +446,7 @@ class StateHandler: state_group_id ) return _StateCacheEntry( - state=state[state_group_id], + state=None, state_group=state_group_id, prev_group=prev_group, delta_ids=delta_ids, @@ -392,30 +469,23 @@ class StateHandler: ) return result - async def resolve_events( - self, - room_version: str, - state_sets: Collection[Iterable[EventBase]], - event: EventBase, - ) -> StateMap[EventBase]: - logger.info( - "Resolving state for %s with %d groups", event.room_id, len(state_sets) - ) - state_set_ids = [ - {(ev.type, ev.state_key): ev.event_id for ev in st} for st in state_sets - ] + async def update_current_state(self, room_id: str) -> None: + """Recalculates the current state for a room, and persists it. - state_map = {ev.event_id: ev for st in state_sets for ev in st} - - new_state = await self._state_resolution_handler.resolve_events_with_store( - event.room_id, - room_version, - state_set_ids, - event_map=state_map, - state_res_store=StateResolutionStore(self.store), - ) + Raises: + SynapseError(502): if all attempts to connect to the event persister worker + fail + """ + writer_instance = self._events_shard_config.get_instance(room_id) + if writer_instance != self._instance_name: + await self._update_current_state_client( + instance_name=writer_instance, + room_id=room_id, + ) + return - return {key: state_map[ev_id] for key, ev_id in new_state.items()} + assert self._storage_controllers.persistence is not None + await self._storage_controllers.persistence.update_current_state(room_id) @attr.s(slots=True, auto_attribs=True) @@ -710,7 +780,7 @@ def _make_state_cache_entry( old_state_event_ids = set(state.values()) if new_state_event_ids == old_state_event_ids: # got an exact match. - return _StateCacheEntry(state=new_state, state_group=sg) + return _StateCacheEntry(state=None, state_group=sg) # TODO: We want to create a state group for this set of events, to # increase cache hits, but we need to make sure that it doesn't @@ -721,14 +791,25 @@ def _make_state_cache_entry( delta_ids: Optional[StateMap[str]] = None for old_group, old_state in state_groups_ids.items(): + if old_state.keys() - new_state.keys(): + # Currently we don't support deltas that remove keys from the state + # map, so we have to ignore this group as a candidate to base the + # new group on. + continue + n_delta_ids = {k: v for k, v in new_state.items() if old_state.get(k) != v} if not delta_ids or len(n_delta_ids) < len(delta_ids): prev_group = old_group delta_ids = n_delta_ids - return _StateCacheEntry( - state=new_state, state_group=None, prev_group=prev_group, delta_ids=delta_ids - ) + if prev_group is not None: + # If we have a prev group and deltas then we can drop the new state from + # the cache (to reduce memory usage). + return _StateCacheEntry( + state=None, state_group=None, prev_group=prev_group, delta_ids=delta_ids + ) + else: + return _StateCacheEntry(state=new_state, state_group=None) @attr.s(slots=True, auto_attribs=True) diff --git a/synapse/state/v1.py b/synapse/state/v1.py index 8bbb4ce41c..500e384695 100644 --- a/synapse/state/v1.py +++ b/synapse/state/v1.py @@ -330,7 +330,7 @@ def _resolve_auth_events( auth_events[(prev_event.type, prev_event.state_key)] = prev_event try: # The signatures have already been checked at this point - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( event, auth_events.values(), ) @@ -347,7 +347,7 @@ def _resolve_normal_events( for event in _ordered_events(events): try: # The signatures have already been checked at this point - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( event, auth_events.values(), ) diff --git a/synapse/state/v2.py b/synapse/state/v2.py index 6a16f38a15..7db032203b 100644 --- a/synapse/state/v2.py +++ b/synapse/state/v2.py @@ -573,7 +573,7 @@ async def _iterative_auth_checks( auth_events[key] = event_map[ev_id] try: - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( event, auth_events.values(), ) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index abfc56b061..e30f9c76d4 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -75,6 +75,19 @@ class SQLBaseStore(metaclass=ABCMeta): self._attempt_to_invalidate_cache( "get_users_in_room_with_profiles", (room_id,) ) + self._attempt_to_invalidate_cache( + "get_number_joined_users_in_room", (room_id,) + ) + self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,)) + + # There's no easy way of invalidating this cache for just the users + # that have changed, so we just clear the entire thing. + self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None) + + for user_id in members_changed: + self._attempt_to_invalidate_cache( + "get_user_in_room_with_profile", (room_id, user_id) + ) # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) @@ -87,6 +100,10 @@ class SQLBaseStore(metaclass=ABCMeta): cache doesn't exist. Mainly used for invalidating caches on workers, where they may not have the cache. + Note that this function does not invalidate any remote caches, only the + local in-memory ones. Any remote invalidation must be performed before + calling this. + Args: cache_name key: Entry to invalidate. If None then invalidates the entire @@ -103,7 +120,10 @@ class SQLBaseStore(metaclass=ABCMeta): if key is None: cache.invalidate_all() else: - cache.invalidate(tuple(key)) + # Prefer any local-only invalidation method. Invalidating any non-local + # cache must be be done before this. + invalidate_method = getattr(cache, "invalidate_local", cache.invalidate) + invalidate_method(tuple(key)) def db_to_json(db_content: Union[memoryview, bytes, bytearray, str]) -> Any: diff --git a/synapse/storage/controllers/__init__.py b/synapse/storage/controllers/__init__.py index 55649719f6..45101cda7a 100644 --- a/synapse/storage/controllers/__init__.py +++ b/synapse/storage/controllers/__init__.py @@ -43,4 +43,6 @@ class StorageControllers: self.persistence = None if stores.persist_events: - self.persistence = EventsPersistenceStorageController(hs, stores) + self.persistence = EventsPersistenceStorageController( + hs, stores, self.state + ) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 4bcb99d06e..cf98b0ab48 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -22,6 +22,7 @@ from typing import ( Any, Awaitable, Callable, + ClassVar, Collection, Deque, Dict, @@ -33,6 +34,7 @@ from typing import ( Set, Tuple, TypeVar, + Union, ) import attr @@ -46,9 +48,11 @@ from synapse.events.snapshot import EventContext from synapse.logging import opentracing from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState from synapse.storage.databases.main.events_worker import EventRedactBehaviour +from synapse.storage.state import StateFilter from synapse.types import ( PersistedEventPosition, RoomStreamToken, @@ -111,9 +115,43 @@ times_pruned_extremities = Counter( @attr.s(auto_attribs=True, slots=True) -class _EventPersistQueueItem: +class _PersistEventsTask: + """A batch of events to persist.""" + + name: ClassVar[str] = "persist_event_batch" # used for opentracing + events_and_contexts: List[Tuple[EventBase, EventContext]] backfilled: bool + + def try_merge(self, task: "_EventPersistQueueTask") -> bool: + """Batches events with the same backfilled option together.""" + if ( + not isinstance(task, _PersistEventsTask) + or self.backfilled != task.backfilled + ): + return False + + self.events_and_contexts.extend(task.events_and_contexts) + return True + + +@attr.s(auto_attribs=True, slots=True) +class _UpdateCurrentStateTask: + """A room whose current state needs recalculating.""" + + name: ClassVar[str] = "update_current_state" # used for opentracing + + def try_merge(self, task: "_EventPersistQueueTask") -> bool: + """Deduplicates consecutive recalculations of current state.""" + return isinstance(task, _UpdateCurrentStateTask) + + +_EventPersistQueueTask = Union[_PersistEventsTask, _UpdateCurrentStateTask] + + +@attr.s(auto_attribs=True, slots=True) +class _EventPersistQueueItem: + task: _EventPersistQueueTask deferred: ObservableDeferred parent_opentracing_span_contexts: List = attr.ib(factory=list) @@ -127,14 +165,16 @@ _PersistResult = TypeVar("_PersistResult") class _EventPeristenceQueue(Generic[_PersistResult]): - """Queues up events so that they can be persisted in bulk with only one - concurrent transaction per room. + """Queues up tasks so that they can be processed with only one concurrent + transaction per room. + + Tasks can be bulk persistence of events or recalculation of a room's current state. """ def __init__( self, per_item_callback: Callable[ - [List[Tuple[EventBase, EventContext]], bool], + [str, _EventPersistQueueTask], Awaitable[_PersistResult], ], ): @@ -150,18 +190,17 @@ class _EventPeristenceQueue(Generic[_PersistResult]): async def add_to_queue( self, room_id: str, - events_and_contexts: Iterable[Tuple[EventBase, EventContext]], - backfilled: bool, + task: _EventPersistQueueTask, ) -> _PersistResult: - """Add events to the queue, with the given persist_event options. + """Add a task to the queue. - If we are not already processing events in this room, starts off a background + If we are not already processing tasks in this room, starts off a background process to to so, calling the per_item_callback for each item. Args: room_id (str): - events_and_contexts (list[(EventBase, EventContext)]): - backfilled (bool): + task (_EventPersistQueueTask): A _PersistEventsTask or + _UpdateCurrentStateTask to process. Returns: the result returned by the `_per_item_callback` passed to @@ -169,26 +208,20 @@ class _EventPeristenceQueue(Generic[_PersistResult]): """ queue = self._event_persist_queues.setdefault(room_id, deque()) - # if the last item in the queue has the same `backfilled` setting, - # we can just add these new events to that item. - if queue and queue[-1].backfilled == backfilled: + if queue and queue[-1].task.try_merge(task): + # the new task has been merged into the last task in the queue end_item = queue[-1] else: - # need to make a new queue item deferred: ObservableDeferred[_PersistResult] = ObservableDeferred( defer.Deferred(), consumeErrors=True ) end_item = _EventPersistQueueItem( - events_and_contexts=[], - backfilled=backfilled, + task=task, deferred=deferred, ) queue.append(end_item) - # add our events to the queue item - end_item.events_and_contexts.extend(events_and_contexts) - # also add our active opentracing span to the item so that we get a link back span = opentracing.active_span() if span: @@ -202,7 +235,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): # add another opentracing span which links to the persist trace. with opentracing.start_active_span_follows_from( - "persist_event_batch_complete", (end_item.opentracing_span_context,) + f"{task.name}_complete", (end_item.opentracing_span_context,) ): pass @@ -234,16 +267,14 @@ class _EventPeristenceQueue(Generic[_PersistResult]): for item in queue: try: with opentracing.start_active_span_follows_from( - "persist_event_batch", + item.task.name, item.parent_opentracing_span_contexts, inherit_force_tracing=True, ) as scope: if scope: item.opentracing_span_context = scope.span.context - ret = await self._per_item_callback( - item.events_and_contexts, item.backfilled - ) + ret = await self._per_item_callback(room_id, item.task) except Exception: with PreserveLoggingContext(): item.deferred.errback() @@ -279,7 +310,12 @@ class EventsPersistenceStorageController: current state and forward extremity changes. """ - def __init__(self, hs: "HomeServer", stores: Databases): + def __init__( + self, + hs: "HomeServer", + stores: Databases, + state_controller: StateStorageController, + ): # We ultimately want to split out the state store from the main store, # so we use separate variables here even though they point to the same # store for now. @@ -292,8 +328,32 @@ class EventsPersistenceStorageController: self._clock = hs.get_clock() self._instance_name = hs.get_instance_name() self.is_mine_id = hs.is_mine_id - self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch) + self._event_persist_queue = _EventPeristenceQueue( + self._process_event_persist_queue_task + ) self._state_resolution_handler = hs.get_state_resolution_handler() + self._state_controller = state_controller + + async def _process_event_persist_queue_task( + self, + room_id: str, + task: _EventPersistQueueTask, + ) -> Dict[str, str]: + """Callback for the _event_persist_queue + + Returns: + A dictionary of event ID to event ID we didn't persist as we already + had another event persisted with the same TXN ID. + """ + if isinstance(task, _PersistEventsTask): + return await self._persist_event_batch(room_id, task) + elif isinstance(task, _UpdateCurrentStateTask): + await self._update_current_state(room_id, task) + return {} + else: + raise AssertionError( + f"Found an unexpected task type in event persistence queue: {task}" + ) @opentracing.trace async def persist_events( @@ -315,6 +375,10 @@ class EventsPersistenceStorageController: if they were deduplicated due to an event already existing that matched the transaction ID; the existing event is returned in such a case. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} for event, ctx in events_and_contexts: @@ -325,7 +389,8 @@ class EventsPersistenceStorageController: ) -> Dict[str, str]: room_id, evs_ctxs = item return await self._event_persist_queue.add_to_queue( - room_id, evs_ctxs, backfilled=backfilled + room_id, + _PersistEventsTask(events_and_contexts=evs_ctxs, backfilled=backfilled), ) ret_vals = await yieldable_gather_results(enqueue, partitioned.items()) @@ -363,12 +428,19 @@ class EventsPersistenceStorageController: latest persisted event. The returned event may not match the given event if it was deduplicated due to an existing event matching the transaction ID. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # add_to_queue returns a map from event ID to existing event ID if the # event was deduplicated. (The dict may also include other entries if # the event was persisted in a batch with other events.) replaced_events = await self._event_persist_queue.add_to_queue( - event.room_id, [(event, context)], backfilled=backfilled + event.room_id, + _PersistEventsTask( + events_and_contexts=[(event, context)], backfilled=backfilled + ), ) replaced_event = replaced_events.get(event.event_id) if replaced_event: @@ -383,20 +455,22 @@ class EventsPersistenceStorageController: async def update_current_state(self, room_id: str) -> None: """Recalculate the current state for a room, and persist it""" + await self._event_persist_queue.add_to_queue( + room_id, + _UpdateCurrentStateTask(), + ) + + async def _update_current_state( + self, room_id: str, _task: _UpdateCurrentStateTask + ) -> None: + """Callback for the _event_persist_queue + + Recalculates the current state for a room, and persists it. + """ state = await self._calculate_current_state(room_id) delta = await self._calculate_state_delta(room_id, state) - # TODO(faster_joins): get a real stream ordering, to make this work correctly - # across workers. - # https://github.com/matrix-org/synapse/issues/12994 - # - # TODO(faster_joins): this can race against event persistence, in which case we - # will end up with incorrect state. Perhaps we should make this a job we - # farm out to the event persister thread, somehow. - # https://github.com/matrix-org/synapse/issues/13007 - # - stream_id = self.main_store.get_room_max_stream_ordering() - await self.persist_events_store.update_current_state(room_id, delta, stream_id) + await self.persist_events_store.update_current_state(room_id, delta) async def _calculate_current_state(self, room_id: str) -> StateMap[str]: """Calculate the current state of a room, based on the forward extremities @@ -438,12 +512,10 @@ class EventsPersistenceStorageController: state_res_store=StateResolutionStore(self.main_store), ) - return res.state + return await res.get_state(self._state_controller, StateFilter.all()) async def _persist_event_batch( - self, - events_and_contexts: List[Tuple[EventBase, EventContext]], - backfilled: bool = False, + self, _room_id: str, task: _PersistEventsTask ) -> Dict[str, str]: """Callback for the _event_persist_queue @@ -453,7 +525,14 @@ class EventsPersistenceStorageController: Returns: A dictionary of event ID to event ID we didn't persist as we already had another event persisted with the same TXN ID. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ + events_and_contexts = task.events_and_contexts + backfilled = task.backfilled + replaced_events: Dict[str, str] = {} if not events_and_contexts: return replaced_events @@ -869,7 +948,8 @@ class EventsPersistenceStorageController: events_context, ) - return res.state, None, new_latest_event_ids + full_state = await res.get_state(self._state_controller) + return full_state, None, new_latest_event_ids async def _prune_extremities( self, diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index fb54826bd7..01259980c2 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -348,7 +348,7 @@ class StateStorageController: room_id: str, prev_group: Optional[int], delta_ids: Optional[StateMap[str]], - current_state_ids: StateMap[str], + current_state_ids: Optional[StateMap[str]], ) -> int: """Store a new set of state, returning a newly assigned state group. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e8c63cf567..b394a6658b 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -23,6 +23,7 @@ from time import monotonic as monotonic_time from typing import ( TYPE_CHECKING, Any, + Awaitable, Callable, Collection, Dict, @@ -38,7 +39,7 @@ from typing import ( ) import attr -from prometheus_client import Histogram +from prometheus_client import Counter, Histogram from typing_extensions import Concatenate, Literal, ParamSpec from twisted.enterprise import adbapi @@ -75,7 +76,8 @@ perf_logger = logging.getLogger("synapse.storage.TIME") sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "sec") sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"]) -sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"]) +sql_txn_count = Counter("synapse_storage_transaction_time_count", "sec", ["desc"]) +sql_txn_duration = Counter("synapse_storage_transaction_time_sum", "sec", ["desc"]) # Unique indexes which have been added in background updates. Maps from table name @@ -168,6 +170,7 @@ class LoggingDatabaseConnection: *, txn_name: Optional[str] = None, after_callbacks: Optional[List["_CallbackListEntry"]] = None, + async_after_callbacks: Optional[List["_AsyncCallbackListEntry"]] = None, exception_callbacks: Optional[List["_CallbackListEntry"]] = None, ) -> "LoggingTransaction": if not txn_name: @@ -178,6 +181,7 @@ class LoggingDatabaseConnection: name=txn_name, database_engine=self.engine, after_callbacks=after_callbacks, + async_after_callbacks=async_after_callbacks, exception_callbacks=exception_callbacks, ) @@ -209,6 +213,9 @@ class LoggingDatabaseConnection: # The type of entry which goes on our after_callbacks and exception_callbacks lists. _CallbackListEntry = Tuple[Callable[..., object], Tuple[object, ...], Dict[str, object]] +_AsyncCallbackListEntry = Tuple[ + Callable[..., Awaitable], Tuple[object, ...], Dict[str, object] +] P = ParamSpec("P") R = TypeVar("R") @@ -227,6 +234,10 @@ class LoggingTransaction: that have been added by `call_after` which should be run on successful completion of the transaction. None indicates that no callbacks should be allowed to be scheduled to run. + async_after_callbacks: A list that asynchronous callbacks will be appended + to by `async_call_after` which should run, before after_callbacks, on + successful completion of the transaction. None indicates that no + callbacks should be allowed to be scheduled to run. exception_callbacks: A list that callbacks will be appended to that have been added by `call_on_exception` which should be run if transaction ends with an error. None indicates that no callbacks @@ -238,6 +249,7 @@ class LoggingTransaction: "name", "database_engine", "after_callbacks", + "async_after_callbacks", "exception_callbacks", ] @@ -247,12 +259,14 @@ class LoggingTransaction: name: str, database_engine: BaseDatabaseEngine, after_callbacks: Optional[List[_CallbackListEntry]] = None, + async_after_callbacks: Optional[List[_AsyncCallbackListEntry]] = None, exception_callbacks: Optional[List[_CallbackListEntry]] = None, ): self.txn = txn self.name = name self.database_engine = database_engine self.after_callbacks = after_callbacks + self.async_after_callbacks = async_after_callbacks self.exception_callbacks = exception_callbacks def call_after( @@ -277,6 +291,28 @@ class LoggingTransaction: # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668 self.after_callbacks.append((callback, args, kwargs)) # type: ignore[arg-type] + def async_call_after( + self, callback: Callable[P, Awaitable], *args: P.args, **kwargs: P.kwargs + ) -> None: + """Call the given asynchronous callback on the main twisted thread after + the transaction has finished (but before those added in `call_after`). + + Mostly used to invalidate remote caches after transactions. + + Note that transactions may be retried a few times if they encounter database + errors such as serialization failures. Callbacks given to `async_call_after` + will accumulate across transaction attempts and will _all_ be called once a + transaction attempt succeeds, regardless of whether previous transaction + attempts failed. Otherwise, if all transaction attempts fail, all + `call_on_exception` callbacks will be run instead. + """ + # if self.async_after_callbacks is None, that means that whatever constructed the + # LoggingTransaction isn't expecting there to be any callbacks; assert that + # is not the case. + assert self.async_after_callbacks is not None + # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668 + self.async_after_callbacks.append((callback, args, kwargs)) # type: ignore[arg-type] + def call_on_exception( self, callback: Callable[P, object], *args: P.args, **kwargs: P.kwargs ) -> None: @@ -366,10 +402,11 @@ class LoggingTransaction: *args: P.args, **kwargs: P.kwargs, ) -> R: - sql = self._make_sql_one_line(sql) + # Generate a one-line version of the SQL to better log it. + one_line_sql = self._make_sql_one_line(sql) # TODO(paul): Maybe use 'info' and 'debug' for values? - sql_logger.debug("[SQL] {%s} %s", self.name, sql) + sql_logger.debug("[SQL] {%s} %s", self.name, one_line_sql) sql = self.database_engine.convert_param_style(sql) if args: @@ -389,7 +426,7 @@ class LoggingTransaction: "db.query", tags={ opentracing.tags.DATABASE_TYPE: "sql", - opentracing.tags.DATABASE_STATEMENT: sql, + opentracing.tags.DATABASE_STATEMENT: one_line_sql, }, ): return func(sql, *args, **kwargs) @@ -573,6 +610,7 @@ class DatabasePool: conn: LoggingDatabaseConnection, desc: str, after_callbacks: List[_CallbackListEntry], + async_after_callbacks: List[_AsyncCallbackListEntry], exception_callbacks: List[_CallbackListEntry], func: Callable[Concatenate[LoggingTransaction, P], R], *args: P.args, @@ -596,6 +634,7 @@ class DatabasePool: conn desc after_callbacks + async_after_callbacks exception_callbacks func *args @@ -658,6 +697,7 @@ class DatabasePool: cursor = conn.cursor( txn_name=name, after_callbacks=after_callbacks, + async_after_callbacks=async_after_callbacks, exception_callbacks=exception_callbacks, ) try: @@ -756,7 +796,8 @@ class DatabasePool: self._current_txn_total_time += duration self._txn_perf_counters.update(desc, duration) - sql_txn_timer.labels(desc).observe(duration) + sql_txn_count.labels(desc).inc(1) + sql_txn_duration.labels(desc).inc(duration) async def runInteraction( self, @@ -797,6 +838,7 @@ class DatabasePool: async def _runInteraction() -> R: after_callbacks: List[_CallbackListEntry] = [] + async_after_callbacks: List[_AsyncCallbackListEntry] = [] exception_callbacks: List[_CallbackListEntry] = [] if not current_context(): @@ -808,6 +850,7 @@ class DatabasePool: self.new_transaction, desc, after_callbacks, + async_after_callbacks, exception_callbacks, func, *args, @@ -816,13 +859,17 @@ class DatabasePool: **kwargs, ) + # We order these assuming that async functions call out to external + # systems (e.g. to invalidate a cache) and the sync functions make these + # changes on any local in-memory caches/similar, and thus must be second. + for async_callback, async_args, async_kwargs in async_after_callbacks: + await async_callback(*async_args, **async_kwargs) for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) - return cast(R, result) except Exception: - for after_callback, after_args, after_kwargs in exception_callbacks: - after_callback(*after_args, **after_kwargs) + for exception_callback, after_args, after_kwargs in exception_callbacks: + exception_callback(*after_args, **after_kwargs) raise # To handle cancellation, we ensure that `after_callback`s and diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 57aaf778ec..4dccbb732a 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -24,9 +24,9 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.databases.main.stats import UserSortOrder -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine +from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.types import Cursor -from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.types import JsonDict, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -87,7 +87,6 @@ class DataStore( RoomStore, RoomBatchStore, RegistrationStore, - StreamWorkerStore, ProfileStore, PresenceStore, TransactionWorkerStore, @@ -112,6 +111,7 @@ class DataStore( SearchStore, TagsStore, AccountDataStore, + StreamWorkerStore, OpenIdStore, ClientIpWorkerStore, DeviceStore, @@ -149,31 +149,6 @@ class DataStore( ], ) - self._cache_id_gen: Optional[MultiWriterIdGenerator] - if isinstance(self.database_engine, PostgresEngine): - # We set the `writers` to an empty list here as we don't care about - # missing updates over restarts, as we'll not have anything in our - # caches to invalidate. (This reduces the amount of writes to the DB - # that happen). - self._cache_id_gen = MultiWriterIdGenerator( - db_conn, - database, - stream_name="caches", - instance_name=hs.get_instance_name(), - tables=[ - ( - "cache_invalidation_stream_by_instance", - "instance_name", - "stream_id", - ) - ], - sequence_name="cache_invalidation_stream_seq", - writers=[], - ) - - else: - self._cache_id_gen = None - super().__init__(database, db_conn, hs) events_max = self._stream_id_gen.get_current_token() diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index e284454b66..64b70a7b28 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -371,52 +371,30 @@ class ApplicationServiceTransactionWorkerStore( device_list_summary=DeviceListUpdates(), ) - async def set_appservice_last_pos(self, pos: int) -> None: - def set_appservice_last_pos_txn(txn: LoggingTransaction) -> None: - txn.execute( - "UPDATE appservice_stream_position SET stream_ordering = ?", (pos,) - ) + async def get_appservice_last_pos(self) -> int: + """ + Get the last stream ordering position for the appservice process. + """ - await self.db_pool.runInteraction( - "set_appservice_last_pos", set_appservice_last_pos_txn + return await self.db_pool.simple_select_one_onecol( + table="appservice_stream_position", + retcol="stream_ordering", + keyvalues={}, + desc="get_appservice_last_pos", ) - async def get_new_events_for_appservice( - self, current_id: int, limit: int - ) -> Tuple[int, List[EventBase]]: - """Get all new events for an appservice""" - - def get_new_events_for_appservice_txn( - txn: LoggingTransaction, - ) -> Tuple[int, List[str]]: - sql = ( - "SELECT e.stream_ordering, e.event_id" - " FROM events AS e" - " WHERE" - " (SELECT stream_ordering FROM appservice_stream_position)" - " < e.stream_ordering" - " AND e.stream_ordering <= ?" - " ORDER BY e.stream_ordering ASC" - " LIMIT ?" - ) - - txn.execute(sql, (current_id, limit)) - rows = txn.fetchall() - - upper_bound = current_id - if len(rows) == limit: - upper_bound = rows[-1][0] - - return upper_bound, [row[1] for row in rows] + async def set_appservice_last_pos(self, pos: int) -> None: + """ + Set the last stream ordering position for the appservice process. + """ - upper_bound, event_ids = await self.db_pool.runInteraction( - "get_new_events_for_appservice", get_new_events_for_appservice_txn + await self.db_pool.simple_update_one( + table="appservice_stream_position", + keyvalues={}, + updatevalues={"stream_ordering": pos}, + desc="set_appservice_last_pos", ) - events = await self.get_events_as_list(event_ids, get_prev_content=True) - - return upper_bound, events - async def get_type_stream_id_for_appservice( self, service: ApplicationService, type: str ) -> int: diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 1653a6a9b6..12e9a42382 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -32,6 +32,7 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.engines import PostgresEngine +from synapse.storage.util.id_generators import MultiWriterIdGenerator from synapse.util.caches.descriptors import _CachedFunction from synapse.util.iterutils import batch_iter @@ -65,6 +66,31 @@ class CacheInvalidationWorkerStore(SQLBaseStore): psql_only=True, # The table is only on postgres DBs. ) + self._cache_id_gen: Optional[MultiWriterIdGenerator] + if isinstance(self.database_engine, PostgresEngine): + # We set the `writers` to an empty list here as we don't care about + # missing updates over restarts, as we'll not have anything in our + # caches to invalidate. (This reduces the amount of writes to the DB + # that happen). + self._cache_id_gen = MultiWriterIdGenerator( + db_conn, + database, + stream_name="caches", + instance_name=hs.get_instance_name(), + tables=[ + ( + "cache_invalidation_stream_by_instance", + "instance_name", + "stream_id", + ) + ], + sequence_name="cache_invalidation_stream_seq", + writers=[], + ) + + else: + self._cache_id_gen = None + async def get_all_updated_caches( self, instance_name: str, last_id: int, current_id: int, limit: int ) -> Tuple[List[Tuple[int, tuple]], int, bool]: @@ -193,7 +219,10 @@ class CacheInvalidationWorkerStore(SQLBaseStore): relates_to: Optional[str], backfilled: bool, ) -> None: - self._invalidate_get_event_cache(event_id) + # This invalidates any local in-memory cached event objects, the original + # process triggering the invalidation is responsible for clearing any external + # cached objects. + self._invalidate_local_get_event_cache(event_id) self.have_seen_event.invalidate((room_id, event_id)) self.get_latest_event_ids_in_room.invalidate((room_id,)) @@ -208,7 +237,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore): self._events_stream_cache.entity_has_changed(room_id, stream_ordering) if redacts: - self._invalidate_get_event_cache(redacts) + self._invalidate_local_get_event_cache(redacts) # Caches which might leak edits must be invalidated for the event being # redacted. self.get_relations_for_event.invalidate((redacts,)) diff --git a/synapse/storage/databases/main/censor_events.py b/synapse/storage/databases/main/censor_events.py index fd3fc298b3..58177ecec1 100644 --- a/synapse/storage/databases/main/censor_events.py +++ b/synapse/storage/databases/main/censor_events.py @@ -194,7 +194,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase # changed its content in the database. We can't call # self._invalidate_cache_and_stream because self.get_event_cache isn't of the # right type. - txn.call_after(self._get_event_cache.invalidate, (event.event_id,)) + self.invalidate_get_event_cache_after_txn(txn, event.event_id) # Send that invalidation to replication so that other workers also invalidate # the event cache. self._send_invalidation_to_replication( diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 422e0e65ca..73c95ffb6f 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -436,7 +436,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): (user_id, device_id), None ) - set_tag("last_deleted_stream_id", last_deleted_stream_id) + set_tag("last_deleted_stream_id", str(last_deleted_stream_id)) if last_deleted_stream_id: has_changed = self._device_inbox_stream_cache.has_entity_changed( diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 03d1334e03..ca0fe8c4be 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -669,7 +669,7 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore): @trace async def get_user_devices_from_cache( - self, query_list: List[Tuple[str, str]] + self, query_list: List[Tuple[str, Optional[str]]] ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]: """Get the devices (and keys if any) for remote users from the cache. @@ -706,8 +706,8 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore): else: results[user_id] = await self.get_cached_devices_for_user(user_id) - set_tag("in_cache", results) - set_tag("not_in_cache", user_ids_not_in_cache) + set_tag("in_cache", str(results)) + set_tag("not_in_cache", str(user_ids_not_in_cache)) return user_ids_not_in_cache, results @@ -1208,6 +1208,65 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore): return devices + @cached() + async def _get_min_device_lists_changes_in_room(self) -> int: + """Returns the minimum stream ID that we have entries for + `device_lists_changes_in_room` + """ + + return await self.db_pool.simple_select_one_onecol( + table="device_lists_changes_in_room", + keyvalues={}, + retcol="COALESCE(MIN(stream_id), 0)", + desc="get_min_device_lists_changes_in_room", + ) + + async def get_device_list_changes_in_rooms( + self, room_ids: Collection[str], from_id: int + ) -> Optional[Set[str]]: + """Return the set of users whose devices have changed in the given rooms + since the given stream ID. + + Returns None if the given stream ID is too old. + """ + + if not room_ids: + return set() + + min_stream_id = await self._get_min_device_lists_changes_in_room() + + if min_stream_id > from_id: + return None + + sql = """ + SELECT DISTINCT user_id FROM device_lists_changes_in_room + WHERE {clause} AND stream_id >= ? + """ + + def _get_device_list_changes_in_rooms_txn( + txn: LoggingTransaction, + clause: str, + args: List[Any], + ) -> Set[str]: + txn.execute(sql.format(clause=clause), args) + return {user_id for user_id, in txn} + + changes = set() + for chunk in batch_iter(room_ids, 1000): + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", chunk + ) + args.append(from_id) + + changes |= await self.db_pool.runInteraction( + "get_device_list_changes_in_rooms", + _get_device_list_changes_in_rooms_txn, + clause, + args, + ) + + return changes + class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__( diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 9b293475c8..46c0d06157 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -22,11 +22,14 @@ from typing import ( List, Optional, Tuple, + Union, cast, + overload, ) import attr from canonicaljson import encode_canonical_json +from typing_extensions import Literal from synapse.api.constants import DeviceKeyAlgorithms from synapse.appservice import ( @@ -113,7 +116,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker user_devices = devices[user_id] results = [] for device_id, device in user_devices.items(): - result = {"device_id": device_id} + result: JsonDict = {"device_id": device_id} keys = device.keys if keys: @@ -143,7 +146,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker key data. The key data will be a dict in the same format as the DeviceKeys type returned by POST /_matrix/client/r0/keys/query. """ - set_tag("query_list", query_list) + set_tag("query_list", str(query_list)) if not query_list: return {} @@ -156,6 +159,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker rv[user_id] = {} for device_id, device_info in device_keys.items(): r = device_info.keys + if r is None: + continue + r["unsigned"] = {} display_name = device_info.display_name if display_name is not None: @@ -164,13 +170,42 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker return rv + @overload + async def get_e2e_device_keys_and_signatures( + self, + query_list: Collection[Tuple[str, Optional[str]]], + include_all_devices: Literal[False] = False, + ) -> Dict[str, Dict[str, DeviceKeyLookupResult]]: + ... + + @overload + async def get_e2e_device_keys_and_signatures( + self, + query_list: Collection[Tuple[str, Optional[str]]], + include_all_devices: bool = False, + include_deleted_devices: Literal[False] = False, + ) -> Dict[str, Dict[str, DeviceKeyLookupResult]]: + ... + + @overload + async def get_e2e_device_keys_and_signatures( + self, + query_list: Collection[Tuple[str, Optional[str]]], + include_all_devices: Literal[True], + include_deleted_devices: Literal[True], + ) -> Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]]: + ... + @trace async def get_e2e_device_keys_and_signatures( self, - query_list: List[Tuple[str, Optional[str]]], + query_list: Collection[Tuple[str, Optional[str]]], include_all_devices: bool = False, include_deleted_devices: bool = False, - ) -> Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]]: + ) -> Union[ + Dict[str, Dict[str, DeviceKeyLookupResult]], + Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]], + ]: """Fetch a list of device keys Any cross-signatures made on the keys by the owner of the device are also @@ -383,7 +418,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker def _add_e2e_one_time_keys(txn: LoggingTransaction) -> None: set_tag("user_id", user_id) set_tag("device_id", device_id) - set_tag("new_keys", new_keys) + set_tag("new_keys", str(new_keys)) # We are protected from race between lookup and insertion due to # a unique constraint. If there is a race of two calls to # `add_e2e_one_time_keys` then they'll conflict and we will only @@ -1044,7 +1079,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker _claim_e2e_one_time_key = _claim_e2e_one_time_key_simple db_autocommit = False - row = await self.db_pool.runInteraction( + claim_row = await self.db_pool.runInteraction( "claim_e2e_one_time_keys", _claim_e2e_one_time_key, user_id, @@ -1052,11 +1087,11 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker algorithm, db_autocommit=db_autocommit, ) - if row: + if claim_row: device_results = results.setdefault(user_id, {}).setdefault( device_id, {} ) - device_results[row[0]] = row[1] + device_results[claim_row[0]] = claim_row[1] continue # No one-time key available, so see if there's a fallback @@ -1126,7 +1161,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): set_tag("user_id", user_id) set_tag("device_id", device_id) set_tag("time_now", time_now) - set_tag("device_keys", device_keys) + set_tag("device_keys", str(device_keys)) old_key_json = self.db_pool.simple_select_one_onecol_txn( txn, diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index ae705889a5..dd2627037c 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -25,8 +25,8 @@ from synapse.storage.database import ( LoggingDatabaseConnection, LoggingTransaction, ) -from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore +from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.util import json_encoder from synapse.util.caches.descriptors import cached @@ -122,7 +122,7 @@ def _deserialize_action(actions: str, is_highlight: bool) -> List[Union[dict, st return DEFAULT_NOTIF_ACTION -class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBaseStore): +class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBaseStore): def __init__( self, database: DatabasePool, @@ -143,12 +143,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas self._find_stream_orderings_for_times, 10 * 60 * 1000 ) - self._rotate_delay = 3 self._rotate_count = 10000 self._doing_notif_rotation = False if hs.config.worker.run_background_tasks: self._rotate_notif_loop = self._clock.looping_call( - self._rotate_notifs, 30 * 60 * 1000 + self._rotate_notifs, 30 * 1000 ) self.db_pool.updates.register_background_index_update( @@ -218,7 +217,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas retcol="event_id", ) - stream_ordering = self.get_stream_id_for_event_txn(txn, event_id) # type: ignore[attr-defined] + stream_ordering = self.get_stream_id_for_event_txn(txn, event_id) return self._get_unread_counts_by_pos_txn( txn, room_id, user_id, stream_ordering @@ -233,14 +232,30 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas counts = NotifCounts() - # First we pull the counts from the summary table + # First we pull the counts from the summary table. + # + # We check that `last_receipt_stream_ordering` matches the stream + # ordering given. If it doesn't match then a new read receipt has arrived and + # we haven't yet updated the counts in `event_push_summary` to reflect + # that; in that case we simply ignore `event_push_summary` counts + # and do a manual count of all of the rows in the `event_push_actions` table + # for this user/room. + # + # If `last_receipt_stream_ordering` is null then that means it's up to + # date (as the row was written by an older version of Synapse that + # updated `event_push_summary` synchronously when persisting a new read + # receipt). txn.execute( """ SELECT stream_ordering, notif_count, COALESCE(unread_count, 0) FROM event_push_summary - WHERE room_id = ? AND user_id = ? AND stream_ordering > ? + WHERE room_id = ? AND user_id = ? + AND ( + (last_receipt_stream_ordering IS NULL AND stream_ordering > ?) + OR last_receipt_stream_ordering = ? + ) """, - (room_id, user_id, stream_ordering), + (room_id, user_id, stream_ordering, stream_ordering), ) row = txn.fetchone() @@ -263,9 +278,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas if row: counts.highlight_count += row[0] - # Finally we need to count push actions that haven't been summarized - # yet. - # We only want to pull out push actions that we haven't summarized yet. + # Finally we need to count push actions that aren't included in the + # summary returned above, e.g. recent events that haven't been + # summarized yet, or the summary is empty due to a recent read receipt. stream_ordering = max(stream_ordering, summary_stream_ordering) notify_count, unread_count = self._get_notif_unread_count_for_user_room( txn, room_id, user_id, stream_ordering @@ -291,12 +306,22 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas actions that have been deleted from `event_push_actions` table. """ + # If there have been no events in the room since the stream ordering, + # there can't be any push actions either. + if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering): + return 0, 0 + clause = "" args = [user_id, room_id, stream_ordering] if max_stream_ordering is not None: clause = "AND ea.stream_ordering <= ?" args.append(max_stream_ordering) + # If the max stream ordering is less than the min stream ordering, + # then obviously there are zero push actions in that range. + if max_stream_ordering <= stream_ordering: + return 0, 0 + sql = f""" SELECT COUNT(CASE WHEN notif = 1 THEN 1 END), @@ -800,6 +825,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas self._doing_notif_rotation = True try: + # First we recalculate push summaries and delete stale push actions + # for rooms/users with new receipts. + while True: + logger.debug("Handling new receipts") + + caught_up = await self.db_pool.runInteraction( + "_handle_new_receipts_for_notifs_txn", + self._handle_new_receipts_for_notifs_txn, + ) + if caught_up: + break + + # Then we update the event push summaries for any new events while True: logger.info("Rotating notifications") @@ -808,12 +846,115 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas ) if caught_up: break - await self.hs.get_clock().sleep(self._rotate_delay) + # Finally we clear out old event push actions. await self._remove_old_push_actions_that_have_rotated() finally: self._doing_notif_rotation = False + def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool: + """Check for new read receipts and delete from event push actions. + + Any push actions which predate the user's most recent read receipt are + now redundant, so we can remove them from `event_push_actions` and + update `event_push_summary`. + """ + + limit = 100 + + min_receipts_stream_id = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_push_summary_last_receipt_stream_id", + keyvalues={}, + retcol="stream_id", + ) + + max_receipts_stream_id = self._receipts_id_gen.get_current_token() + + sql = """ + SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering + FROM receipts_linearized AS r + INNER JOIN events AS e USING (event_id) + WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ? + ORDER BY r.stream_id ASC + LIMIT ? + """ + + # We only want local users, so we add a dodgy filter to the above query + # and recheck it below. + user_filter = "%:" + self.hs.hostname + + txn.execute( + sql, + ( + min_receipts_stream_id, + max_receipts_stream_id, + user_filter, + limit, + ), + ) + rows = txn.fetchall() + + old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_push_summary_stream_ordering", + keyvalues={}, + retcol="stream_ordering", + ) + + # For each new read receipt we delete push actions from before it and + # recalculate the summary. + for _, room_id, user_id, stream_ordering in rows: + # Only handle our own read receipts. + if not self.hs.is_mine_id(user_id): + continue + + txn.execute( + """ + DELETE FROM event_push_actions + WHERE room_id = ? + AND user_id = ? + AND stream_ordering <= ? + AND highlight = 0 + """, + (room_id, user_id, stream_ordering), + ) + + notif_count, unread_count = self._get_notif_unread_count_for_user_room( + txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering + ) + + self.db_pool.simple_upsert_txn( + txn, + table="event_push_summary", + keyvalues={"room_id": room_id, "user_id": user_id}, + values={ + "notif_count": notif_count, + "unread_count": unread_count, + "stream_ordering": old_rotate_stream_ordering, + "last_receipt_stream_ordering": stream_ordering, + }, + ) + + # We always update `event_push_summary_last_receipt_stream_id` to + # ensure that we don't rescan the same receipts for remote users. + + upper_limit = max_receipts_stream_id + if len(rows) >= limit: + # If we pulled out a limited number of rows we only update the + # position to the last receipt we processed, so we continue + # processing the rest next iteration. + upper_limit = rows[-1][0] + + self.db_pool.simple_update_txn( + txn, + table="event_push_summary_last_receipt_stream_id", + keyvalues={}, + updatevalues={"stream_id": upper_limit}, + ) + + return len(rows) < limit + def _rotate_notifs_txn(self, txn: LoggingTransaction) -> bool: """Archives older notifications into event_push_summary. Returns whether the archiving process has caught up or not. @@ -839,7 +980,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas stream_row = txn.fetchone() if stream_row: (offset_stream_ordering,) = stream_row - rotate_to_stream_ordering = offset_stream_ordering + + # We need to bound by the current token to ensure that we handle + # out-of-order writes correctly. + rotate_to_stream_ordering = min( + offset_stream_ordering, self._stream_id_gen.get_current_token() + ) caught_up = False else: rotate_to_stream_ordering = self._stream_id_gen.get_current_token() @@ -865,13 +1011,17 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas sql = """ SELECT user_id, room_id, coalesce(old.%s, 0) + upd.cnt, - upd.stream_ordering, - old.user_id + upd.stream_ordering FROM ( SELECT user_id, room_id, count(*) as cnt, - max(stream_ordering) as stream_ordering - FROM event_push_actions - WHERE ? <= stream_ordering AND stream_ordering < ? + max(ea.stream_ordering) as stream_ordering + FROM event_push_actions AS ea + LEFT JOIN event_push_summary AS old USING (user_id, room_id) + WHERE ? < ea.stream_ordering AND ea.stream_ordering <= ? + AND ( + old.last_receipt_stream_ordering IS NULL + OR old.last_receipt_stream_ordering < ea.stream_ordering + ) AND %s = 1 GROUP BY user_id, room_id ) AS upd @@ -894,7 +1044,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas summaries[(row[0], row[1])] = _EventPushSummary( unread_count=row[2], stream_ordering=row[3], - old_user_id=row[4], notif_count=0, ) @@ -915,57 +1064,27 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas summaries[(row[0], row[1])] = _EventPushSummary( unread_count=0, stream_ordering=row[3], - old_user_id=row[4], notif_count=row[2], ) logger.info("Rotating notifications, handling %d rows", len(summaries)) - # If the `old.user_id` above is NULL then we know there isn't already an - # entry in the table, so we simply insert it. Otherwise we update the - # existing table. - self.db_pool.simple_insert_many_txn( + self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", - keys=( - "user_id", - "room_id", - "notif_count", - "unread_count", - "stream_ordering", - ), - values=[ + key_names=("user_id", "room_id"), + key_values=[(user_id, room_id) for user_id, room_id in summaries], + value_names=("notif_count", "unread_count", "stream_ordering"), + value_values=[ ( - user_id, - room_id, summary.notif_count, summary.unread_count, summary.stream_ordering, ) - for ((user_id, room_id), summary) in summaries.items() - if summary.old_user_id is None + for summary in summaries.values() ], ) - txn.execute_batch( - """ - UPDATE event_push_summary - SET notif_count = ?, unread_count = ?, stream_ordering = ? - WHERE user_id = ? AND room_id = ? - """, - ( - ( - summary.notif_count, - summary.unread_count, - summary.stream_ordering, - user_id, - room_id, - ) - for ((user_id, room_id), summary) in summaries.items() - if summary.old_user_id is not None - ), - ) - txn.execute( "UPDATE event_push_summary_stream_ordering SET stream_ordering = ?", (rotate_to_stream_ordering,), @@ -993,12 +1112,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas ) -> bool: # We don't want to clear out too much at a time, so we bound our # deletes. - batch_size = 10000 + batch_size = self._rotate_count txn.execute( """ SELECT stream_ordering FROM event_push_actions - WHERE stream_ordering < ? AND highlight = 0 + WHERE stream_ordering <= ? AND highlight = 0 ORDER BY stream_ordering ASC LIMIT 1 OFFSET ? """, ( @@ -1013,10 +1132,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas else: stream_ordering = max_stream_ordering_to_delete + # We need to use a inclusive bound here to handle the case where a + # single stream ordering has more than `batch_size` rows. txn.execute( """ DELETE FROM event_push_actions - WHERE stream_ordering < ? AND highlight = 0 + WHERE stream_ordering <= ? AND highlight = 0 """, (stream_ordering,), ) @@ -1033,66 +1154,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas if done: break - def _remove_old_push_actions_before_txn( - self, txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int - ) -> None: - """ - Purges old push actions for a user and room before a given - stream_ordering. - - We however keep a months worth of highlighted notifications, so that - users can still get a list of recent highlights. - - Args: - txn: The transaction - room_id: Room ID to delete from - user_id: user ID to delete for - stream_ordering: The lowest stream ordering which will - not be deleted. - """ - txn.call_after( - self.get_unread_event_push_actions_by_room_for_user.invalidate, - (room_id, user_id), - ) - - # We need to join on the events table to get the received_ts for - # event_push_actions and sqlite won't let us use a join in a delete so - # we can't just delete where received_ts < x. Furthermore we can - # only identify event_push_actions by a tuple of room_id, event_id - # we we can't use a subquery. - # Instead, we look up the stream ordering for the last event in that - # room received before the threshold time and delete event_push_actions - # in the room with a stream_odering before that. - txn.execute( - "DELETE FROM event_push_actions " - " WHERE user_id = ? AND room_id = ? AND " - " stream_ordering <= ?" - " AND ((stream_ordering < ? AND highlight = 1) or highlight = 0)", - (user_id, room_id, stream_ordering, self.stream_ordering_month_ago), - ) - - old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn( - txn, - table="event_push_summary_stream_ordering", - keyvalues={}, - retcol="stream_ordering", - ) - - notif_count, unread_count = self._get_notif_unread_count_for_user_room( - txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering - ) - - self.db_pool.simple_upsert_txn( - txn, - table="event_push_summary", - keyvalues={"room_id": room_id, "user_id": user_id}, - values={ - "notif_count": notif_count, - "unread_count": unread_count, - "stream_ordering": old_rotate_stream_ordering, - }, - ) - class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" @@ -1120,6 +1181,16 @@ class EventPushActionsStore(EventPushActionsWorkerStore): where_clause="highlight=1", ) + # Add index to make deleting old push actions faster. + self.db_pool.updates.register_background_index_update( + "event_push_actions_stream_highlight_index", + index_name="event_push_actions_stream_highlight_index", + table="event_push_actions", + columns=["highlight", "stream_ordering"], + where_clause="highlight=0", + psql_only=True, + ) + async def get_push_actions_for_user( self, user_id: str, @@ -1195,5 +1266,4 @@ class _EventPushSummary: unread_count: int stream_ordering: int - old_user_id: str notif_count: int diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index a3e12f1e9b..1f600f1190 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -16,6 +16,7 @@ import itertools import logging from collections import OrderedDict +from http import HTTPStatus from typing import ( TYPE_CHECKING, Any, @@ -35,6 +36,7 @@ from prometheus_client import Counter import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes +from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -69,6 +71,24 @@ event_counter = Counter( ) +class PartialStateConflictError(SynapseError): + """An internal error raised when attempting to persist an event with partial state + after the room containing the event has been un-partial stated. + + This error should be handled by recomputing the event context and trying again. + + This error has an HTTP status code so that it can be transported over replication. + It should not be exposed to clients. + """ + + def __init__(self) -> None: + super().__init__( + HTTPStatus.CONFLICT, + msg="Cannot persist partial state event in un-partial stated room", + errcode=Codes.UNKNOWN, + ) + + @attr.s(slots=True, auto_attribs=True) class DeltaState: """Deltas to use to update the `current_state_events` table. @@ -154,6 +174,10 @@ class PersistEventsStore: Returns: Resolves when the events have been persisted + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # We want to calculate the stream orderings as late as possible, as @@ -354,6 +378,9 @@ class PersistEventsStore: For each room, a list of the event ids which are the forward extremities. + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ state_delta_for_room = state_delta_for_room or {} new_forward_extremities = new_forward_extremities or {} @@ -980,16 +1007,16 @@ class PersistEventsStore: self, room_id: str, state_delta: DeltaState, - stream_id: int, ) -> None: """Update the current state stored in the datatabase for the given room""" - await self.db_pool.runInteraction( - "update_current_state", - self._update_current_state_txn, - state_delta_by_room={room_id: state_delta}, - stream_id=stream_id, - ) + async with self._stream_id_gen.get_next() as stream_ordering: + await self.db_pool.runInteraction( + "update_current_state", + self._update_current_state_txn, + state_delta_by_room={room_id: state_delta}, + stream_id=stream_ordering, + ) def _update_current_state_txn( self, @@ -1266,7 +1293,7 @@ class PersistEventsStore: depth_updates: Dict[str, int] = {} for event, context in events_and_contexts: # Remove the any existing cache entries for the event_ids - txn.call_after(self.store._invalidate_get_event_cache, event.event_id) + self.store.invalidate_get_event_cache_after_txn(txn, event.event_id) # Then update the `stream_ordering` position to mark the latest # event as the front of the room. This should not be done for # backfilled events because backfilled events have negative @@ -1304,6 +1331,10 @@ class PersistEventsStore: Returns: new list, without events which are already in the events table. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ txn.execute( "SELECT event_id, outlier FROM events WHERE event_id in (%s)" @@ -1315,9 +1346,24 @@ class PersistEventsStore: event_id: outlier for event_id, outlier in txn } + logger.debug( + "_update_outliers_txn: events=%s have_persisted=%s", + [ev.event_id for ev, _ in events_and_contexts], + have_persisted, + ) + to_remove = set() for event, context in events_and_contexts: - if event.event_id not in have_persisted: + outlier_persisted = have_persisted.get(event.event_id) + logger.debug( + "_update_outliers_txn: event=%s outlier=%s outlier_persisted=%s", + event.event_id, + event.internal_metadata.is_outlier(), + outlier_persisted, + ) + + # Ignore events which we haven't persisted at all + if outlier_persisted is None: continue to_remove.add(event) @@ -1327,7 +1373,6 @@ class PersistEventsStore: # was an outlier or not - what we have is at least as good. continue - outlier_persisted = have_persisted[event.event_id] if not event.internal_metadata.is_outlier() and outlier_persisted: # We received a copy of an event that we had already stored as # an outlier in the database. We now have some state at that event @@ -1338,7 +1383,10 @@ class PersistEventsStore: # events down /sync. In general they will be historical events, so that # doesn't matter too much, but that is not always the case. - logger.info("Updating state for ex-outlier event %s", event.event_id) + logger.info( + "_update_outliers_txn: Updating state for ex-outlier event %s", + event.event_id, + ) # insert into event_to_state_groups. try: @@ -1638,13 +1686,13 @@ class PersistEventsStore: if not row["rejects"] and not row["redacts"]: to_prefill.append(EventCacheEntry(event=event, redacted_event=None)) - def prefill() -> None: + async def prefill() -> None: for cache_entry in to_prefill: - self.store._get_event_cache.set( + await self.store._get_event_cache.set( (cache_entry.event.event_id,), cache_entry ) - txn.call_after(prefill) + txn.async_call_after(prefill) def _store_redaction(self, txn: LoggingTransaction, event: EventBase) -> None: """Invalidate the caches for the redacted event. @@ -1653,7 +1701,7 @@ class PersistEventsStore: _invalidate_caches_for_event. """ assert event.redacts is not None - txn.call_after(self.store._invalidate_get_event_cache, event.redacts) + self.store.invalidate_get_event_cache_after_txn(txn, event.redacts) txn.call_after(self.store.get_relations_for_event.invalidate, (event.redacts,)) txn.call_after(self.store.get_applicable_edit.invalidate, (event.redacts,)) @@ -1766,6 +1814,18 @@ class PersistEventsStore: self.store.get_invited_rooms_for_local_user.invalidate, (event.state_key,), ) + txn.call_after( + self.store.get_local_users_in_room.invalidate, + (event.room_id,), + ) + txn.call_after( + self.store.get_number_joined_users_in_room.invalidate, + (event.room_id,), + ) + txn.call_after( + self.store.get_user_in_room_with_profile.invalidate, + (event.room_id, event.state_key), + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. @@ -2215,6 +2275,11 @@ class PersistEventsStore: txn: LoggingTransaction, events_and_contexts: Collection[Tuple[EventBase, EventContext]], ) -> None: + """ + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. + """ state_groups = {} for event, context in events_and_contexts: if event.internal_metadata.is_outlier(): @@ -2239,19 +2304,37 @@ class PersistEventsStore: # if we have partial state for these events, record the fact. (This happens # here rather than in _store_event_txn because it also needs to happen when # we de-outlier an event.) - self.db_pool.simple_insert_many_txn( - txn, - table="partial_state_events", - keys=("room_id", "event_id"), - values=[ - ( - event.room_id, - event.event_id, - ) - for event, ctx in events_and_contexts - if ctx.partial_state - ], - ) + try: + self.db_pool.simple_insert_many_txn( + txn, + table="partial_state_events", + keys=("room_id", "event_id"), + values=[ + ( + event.room_id, + event.event_id, + ) + for event, ctx in events_and_contexts + if ctx.partial_state + ], + ) + except self.db_pool.engine.module.IntegrityError: + logger.info( + "Cannot persist events %s in rooms %s: room has been un-partial stated", + [ + event.event_id + for event, ctx in events_and_contexts + if ctx.partial_state + ], + list( + { + event.room_id + for event, ctx in events_and_contexts + if ctx.partial_state + } + ), + ) + raise PartialStateConflictError() self.db_pool.simple_upsert_many_txn( txn, diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index eeca85fc94..6e8aeed7b4 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -67,6 +67,8 @@ class _BackgroundUpdates: EVENT_EDGES_DROP_INVALID_ROWS = "event_edges_drop_invalid_rows" EVENT_EDGES_REPLACE_INDEX = "event_edges_replace_index" + EVENTS_POPULATE_STATE_KEY_REJECTIONS = "events_populate_state_key_rejections" + @attr.s(slots=True, frozen=True, auto_attribs=True) class _CalculateChainCover: @@ -253,6 +255,11 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): replaces_index="ev_edges_id", ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.EVENTS_POPULATE_STATE_KEY_REJECTIONS, + self._background_events_populate_state_key_rejections, + ) + async def _background_reindex_fields_sender( self, progress: JsonDict, batch_size: int ) -> int: @@ -1399,3 +1406,83 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): ) return batch_size + + async def _background_events_populate_state_key_rejections( + self, progress: JsonDict, batch_size: int + ) -> int: + """Back-populate `events.state_key` and `events.rejection_reason""" + + min_stream_ordering_exclusive = progress["min_stream_ordering_exclusive"] + max_stream_ordering_inclusive = progress["max_stream_ordering_inclusive"] + + def _populate_txn(txn: LoggingTransaction) -> bool: + """Returns True if we're done.""" + + # first we need to find an endpoint. + # we need to find the final row in the batch of batch_size, which means + # we need to skip over (batch_size-1) rows and get the next row. + txn.execute( + """ + SELECT stream_ordering FROM events + WHERE stream_ordering > ? AND stream_ordering <= ? + ORDER BY stream_ordering + LIMIT 1 OFFSET ? + """, + ( + min_stream_ordering_exclusive, + max_stream_ordering_inclusive, + batch_size - 1, + ), + ) + + endpoint = None + row = txn.fetchone() + if row: + endpoint = row[0] + + where_clause = "stream_ordering > ?" + args = [min_stream_ordering_exclusive] + if endpoint: + where_clause += " AND stream_ordering <= ?" + args.append(endpoint) + + # now do the updates. + txn.execute( + f""" + UPDATE events + SET state_key = (SELECT state_key FROM state_events se WHERE se.event_id = events.event_id), + rejection_reason = (SELECT reason FROM rejections rej WHERE rej.event_id = events.event_id) + WHERE ({where_clause}) + """, + args, + ) + + logger.info( + "populated new `events` columns up to %s/%i: updated %i rows", + endpoint, + max_stream_ordering_inclusive, + txn.rowcount, + ) + + if endpoint is None: + # we're done + return True + + progress["min_stream_ordering_exclusive"] = endpoint + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.EVENTS_POPULATE_STATE_KEY_REJECTIONS, + progress, + ) + return False + + done = await self.db_pool.runInteraction( + desc="events_populate_state_key_rejections", func=_populate_txn + ) + + if done: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.EVENTS_POPULATE_STATE_KEY_REJECTIONS + ) + + return batch_size diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index b99b107784..5914a35420 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -79,7 +79,7 @@ from synapse.types import JsonDict, get_domain_from_id from synapse.util import unwrapFirstError from synapse.util.async_helpers import ObservableDeferred, delay_cancellation from synapse.util.caches.descriptors import cached, cachedList -from synapse.util.caches.lrucache import LruCache +from synapse.util.caches.lrucache import AsyncLruCache from synapse.util.iterutils import batch_iter from synapse.util.metrics import Measure @@ -238,7 +238,9 @@ class EventsWorkerStore(SQLBaseStore): 5 * 60 * 1000, ) - self._get_event_cache: LruCache[Tuple[str], EventCacheEntry] = LruCache( + self._get_event_cache: AsyncLruCache[ + Tuple[str], EventCacheEntry + ] = AsyncLruCache( cache_name="*getEvent*", max_size=hs.config.caches.event_cache_size, ) @@ -292,25 +294,6 @@ class EventsWorkerStore(SQLBaseStore): super().process_replication_rows(stream_name, instance_name, token, rows) - async def get_received_ts(self, event_id: str) -> Optional[int]: - """Get received_ts (when it was persisted) for the event. - - Raises an exception for unknown events. - - Args: - event_id: The event ID to query. - - Returns: - Timestamp in milliseconds, or None for events that were persisted - before received_ts was implemented. - """ - return await self.db_pool.simple_select_one_onecol( - table="events", - keyvalues={"event_id": event_id}, - retcol="received_ts", - desc="get_received_ts", - ) - async def have_censored_event(self, event_id: str) -> bool: """Check if an event has been censored, i.e. if the content of the event has been erased from the database due to a redaction. @@ -617,7 +600,7 @@ class EventsWorkerStore(SQLBaseStore): Returns: map from event id to result """ - event_entry_map = self._get_events_from_cache( + event_entry_map = await self._get_events_from_cache( event_ids, ) @@ -729,12 +712,46 @@ class EventsWorkerStore(SQLBaseStore): return event_entry_map - def _invalidate_get_event_cache(self, event_id: str) -> None: - self._get_event_cache.invalidate((event_id,)) + def invalidate_get_event_cache_after_txn( + self, txn: LoggingTransaction, event_id: str + ) -> None: + """ + Prepares a database transaction to invalidate the get event cache for a given + event ID when executed successfully. This is achieved by attaching two callbacks + to the transaction, one to invalidate the async cache and one for the in memory + sync cache (importantly called in that order). + + Arguments: + txn: the database transaction to attach the callbacks to + event_id: the event ID to be invalidated from caches + """ + + txn.async_call_after(self._invalidate_async_get_event_cache, event_id) + txn.call_after(self._invalidate_local_get_event_cache, event_id) + + async def _invalidate_async_get_event_cache(self, event_id: str) -> None: + """ + Invalidates an event in the asyncronous get event cache, which may be remote. + + Arguments: + event_id: the event ID to invalidate + """ + + await self._get_event_cache.invalidate((event_id,)) + + def _invalidate_local_get_event_cache(self, event_id: str) -> None: + """ + Invalidates an event in local in-memory get event caches. + + Arguments: + event_id: the event ID to invalidate + """ + + self._get_event_cache.invalidate_local((event_id,)) self._event_ref.pop(event_id, None) self._current_event_fetches.pop(event_id, None) - def _get_events_from_cache( + async def _get_events_from_cache( self, events: Iterable[str], update_metrics: bool = True ) -> Dict[str, EventCacheEntry]: """Fetch events from the caches. @@ -749,7 +766,7 @@ class EventsWorkerStore(SQLBaseStore): for event_id in events: # First check if it's in the event cache - ret = self._get_event_cache.get( + ret = await self._get_event_cache.get( (event_id,), None, update_metrics=update_metrics ) if ret: @@ -771,7 +788,7 @@ class EventsWorkerStore(SQLBaseStore): # We add the entry back into the cache as we want to keep # recently queried events in the cache. - self._get_event_cache.set((event_id,), cache_entry) + await self._get_event_cache.set((event_id,), cache_entry) return event_map @@ -965,7 +982,13 @@ class EventsWorkerStore(SQLBaseStore): } row_dict = self.db_pool.new_transaction( - conn, "do_fetch", [], [], self._fetch_event_rows, events_to_fetch + conn, + "do_fetch", + [], + [], + [], + self._fetch_event_rows, + events_to_fetch, ) # We only want to resolve deferreds from the main thread @@ -1148,7 +1171,7 @@ class EventsWorkerStore(SQLBaseStore): event=original_ev, redacted_event=redacted_event ) - self._get_event_cache.set((event_id,), cache_entry) + await self._get_event_cache.set((event_id,), cache_entry) result_map[event_id] = cache_entry if not redacted_event: @@ -1382,7 +1405,9 @@ class EventsWorkerStore(SQLBaseStore): # if the event cache contains the event, obviously we've seen it. cache_results = { - (rid, eid) for (rid, eid) in keys if self._get_event_cache.contains((eid,)) + (rid, eid) + for (rid, eid) in keys + if await self._get_event_cache.contains((eid,)) } results = dict.fromkeys(cache_results, True) remaining = [k for k in keys if k not in cache_results] @@ -1465,7 +1490,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_all_new_forward_event_rows( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]: """Returns new events, for the Events replication stream Args: @@ -1481,10 +1506,11 @@ class EventsWorkerStore(SQLBaseStore): def get_all_new_forward_event_rows( txn: LoggingTransaction, - ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]: sql = ( "SELECT e.stream_ordering, e.event_id, e.room_id, e.type," - " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" + " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL," + " e.outlier" " FROM events AS e" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events AS se USING (event_id)" @@ -1498,7 +1524,8 @@ class EventsWorkerStore(SQLBaseStore): ) txn.execute(sql, (last_id, current_id, instance_name, limit)) return cast( - List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall() + List[Tuple[int, str, str, str, str, str, str, str, bool, bool]], + txn.fetchall(), ) return await self.db_pool.runInteraction( @@ -1507,7 +1534,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_ex_outlier_stream_rows( self, instance_name: str, last_id: int, current_id: int - ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]: """Returns de-outliered events, for the Events replication stream Args: @@ -1522,11 +1549,14 @@ class EventsWorkerStore(SQLBaseStore): def get_ex_outlier_stream_rows_txn( txn: LoggingTransaction, - ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]: sql = ( "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," - " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" + " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL," + " e.outlier" " FROM events AS e" + # NB: the next line (inner join) is what makes this query different from + # get_all_new_forward_event_rows. " INNER JOIN ex_outlier_stream AS out USING (event_id)" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events AS se USING (event_id)" @@ -1541,7 +1571,8 @@ class EventsWorkerStore(SQLBaseStore): txn.execute(sql, (last_id, current_id, instance_name)) return cast( - List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall() + List[Tuple[int, str, str, str, str, str, str, str, bool, bool]], + txn.fetchall(), ) return await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py index 9a63f953fb..efd136a864 100644 --- a/synapse/storage/databases/main/monthly_active_users.py +++ b/synapse/storage/databases/main/monthly_active_users.py @@ -66,6 +66,7 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore): "initialise_mau_threepids", [], [], + [], self._initialise_reserved_users, hs.config.server.mau_limits_reserved_threepids[: self._max_mau_value], ) diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 87b0d09039..f6822707e4 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -19,6 +19,8 @@ from synapse.api.errors import SynapseError from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main import CacheInvalidationWorkerStore from synapse.storage.databases.main.state import StateGroupWorkerStore +from synapse.storage.engines import PostgresEngine +from synapse.storage.engines._base import IsolationLevel from synapse.types import RoomStreamToken logger = logging.getLogger(__name__) @@ -302,7 +304,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): self._invalidate_cache_and_stream( txn, self.have_seen_event, (room_id, event_id) ) - self._invalidate_get_event_cache(event_id) + self.invalidate_get_event_cache_after_txn(txn, event_id) logger.info("[purge] done") @@ -317,11 +319,38 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): Returns: The list of state groups to delete. """ - return await self.db_pool.runInteraction( - "purge_room", self._purge_room_txn, room_id + + # This first runs the purge transaction with READ_COMMITTED isolation level, + # meaning any new rows in the tables will not trigger a serialization error. + # We then run the same purge a second time without this isolation level to + # purge any of those rows which were added during the first. + + state_groups_to_delete = await self.db_pool.runInteraction( + "purge_room", + self._purge_room_txn, + room_id=room_id, + isolation_level=IsolationLevel.READ_COMMITTED, + ) + + state_groups_to_delete.extend( + await self.db_pool.runInteraction( + "purge_room", + self._purge_room_txn, + room_id=room_id, + ), ) + return state_groups_to_delete + def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: + # This collides with event persistence so we cannot write new events and metadata into + # a room while deleting it or this transaction will fail. + if isinstance(self.database_engine, PostgresEngine): + txn.execute( + "SELECT room_version FROM rooms WHERE room_id = ? FOR UPDATE", + (room_id,), + ) + # First, fetch all the state groups that should be deleted, before # we delete that information. txn.execute( diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 86649c1e6c..768f95d16c 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -228,6 +228,7 @@ class PushRulesWorkerStore( iterable=user_ids, retcols=("*",), desc="bulk_get_push_rules", + batch_size=1000, ) rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"]))) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index bec6d60577..0090c9f225 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -26,7 +26,7 @@ from typing import ( cast, ) -from synapse.api.constants import EduTypes, ReceiptTypes +from synapse.api.constants import EduTypes from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import ReceiptsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause @@ -682,17 +682,6 @@ class ReceiptsWorkerStore(SQLBaseStore): lock=False, ) - # When updating a local users read receipt, remove any push actions - # which resulted from the receipt's event and all earlier events. - if ( - self.hs.is_mine_id(user_id) - and receipt_type in (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE) - and stream_ordering is not None - ): - self._remove_old_push_actions_before_txn( # type: ignore[attr-defined] - txn, room_id=room_id, user_id=user_id, stream_ordering=stream_ordering - ) - return rx_ts def _graph_to_linear( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 5760d3428e..d6d485507b 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -32,12 +32,17 @@ from typing import ( import attr -from synapse.api.constants import EventContentFields, EventTypes, JoinRules +from synapse.api.constants import ( + EventContentFields, + EventTypes, + JoinRules, + PublicRoomsFilterFields, +) from synapse.api.errors import StoreError from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.config.homeserver import HomeServerConfig from synapse.events import EventBase -from synapse.storage._base import SQLBaseStore, db_to_json +from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, @@ -170,7 +175,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): rooms.creator, state.encryption, state.is_federatable AS federatable, rooms.is_public AS public, state.join_rules, state.guest_access, state.history_visibility, curr.current_state_events AS state_events, - state.avatar, state.topic + state.avatar, state.topic, state.room_type FROM rooms LEFT JOIN room_stats_state state USING (room_id) LEFT JOIN room_stats_current curr USING (room_id) @@ -199,10 +204,29 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): desc="get_public_room_ids", ) + def _construct_room_type_where_clause( + self, room_types: Union[List[Union[str, None]], None] + ) -> Tuple[Union[str, None], List[str]]: + if not room_types or not self.config.experimental.msc3827_enabled: + return None, [] + else: + # We use None when we want get rooms without a type + is_null_clause = "" + if None in room_types: + is_null_clause = "OR room_type IS NULL" + room_types = [value for value in room_types if value is not None] + + list_clause, args = make_in_list_sql_clause( + self.database_engine, "room_type", room_types + ) + + return f"({list_clause} {is_null_clause})", args + async def count_public_rooms( self, network_tuple: Optional[ThirdPartyInstanceID], ignore_non_federatable: bool, + search_filter: Optional[dict], ) -> int: """Counts the number of public rooms as tracked in the room_stats_current and room_stats_state table. @@ -210,11 +234,20 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): Args: network_tuple ignore_non_federatable: If true filters out non-federatable rooms + search_filter """ def _count_public_rooms_txn(txn: LoggingTransaction) -> int: query_args = [] + room_type_clause, args = self._construct_room_type_where_clause( + search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None) + if search_filter + else None + ) + room_type_clause = f" AND {room_type_clause}" if room_type_clause else "" + query_args += args + if network_tuple: if network_tuple.appservice_id: published_sql = """ @@ -249,6 +282,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): OR join_rules = '{JoinRules.KNOCK_RESTRICTED}' OR history_visibility = 'world_readable' ) + {room_type_clause} AND joined_members > 0 """ @@ -347,8 +381,12 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): if ignore_non_federatable: where_clauses.append("is_federatable") - if search_filter and search_filter.get("generic_search_term", None): - search_term = "%" + search_filter["generic_search_term"] + "%" + if search_filter and search_filter.get( + PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None + ): + search_term = ( + "%" + search_filter[PublicRoomsFilterFields.GENERIC_SEARCH_TERM] + "%" + ) where_clauses.append( """ @@ -365,6 +403,15 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): search_term.lower(), ] + room_type_clause, args = self._construct_room_type_where_clause( + search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None) + if search_filter + else None + ) + if room_type_clause: + where_clauses.append(room_type_clause) + query_args += args + where_clause = "" if where_clauses: where_clause = " AND " + " AND ".join(where_clauses) @@ -373,7 +420,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): sql = f""" SELECT room_id, name, topic, canonical_alias, joined_members, - avatar, history_visibility, guest_access, join_rules + avatar, history_visibility, guest_access, join_rules, room_type FROM ( {published_sql} ) published @@ -549,7 +596,8 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): SELECT state.room_id, state.name, state.canonical_alias, curr.joined_members, curr.local_users_in_room, rooms.room_version, rooms.creator, state.encryption, state.is_federatable, rooms.is_public, state.join_rules, - state.guest_access, state.history_visibility, curr.current_state_events + state.guest_access, state.history_visibility, curr.current_state_events, + state.room_type FROM room_stats_state state INNER JOIN room_stats_current curr USING (room_id) INNER JOIN rooms USING (room_id) @@ -599,6 +647,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): "guest_access": room[11], "history_visibility": room[12], "state_events": room[13], + "room_type": room[14], } ) @@ -1109,19 +1158,25 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): return room_servers async def clear_partial_state_room(self, room_id: str) -> bool: - # this can race with incoming events, so we watch out for FK errors. - # TODO(faster_joins): this still doesn't completely fix the race, since the persist process - # is not atomic. I fear we need an application-level lock. - # https://github.com/matrix-org/synapse/issues/12988 + """Clears the partial state flag for a room. + + Args: + room_id: The room whose partial state flag is to be cleared. + + Returns: + `True` if the partial state flag has been cleared successfully. + + `False` if the partial state flag could not be cleared because the room + still contains events with partial state. + """ try: await self.db_pool.runInteraction( "clear_partial_state_room", self._clear_partial_state_room_txn, room_id ) return True - except self.db_pool.engine.module.DatabaseError as e: - # TODO(faster_joins): how do we distinguish between FK errors and other errors? - # https://github.com/matrix-org/synapse/issues/12988 - logger.warning( + except self.db_pool.engine.module.IntegrityError as e: + # Assume that any `IntegrityError`s are due to partial state events. + logger.info( "Exception while clearing lazy partial-state-room %s, retrying: %s", room_id, e, @@ -1166,6 +1221,7 @@ class _BackgroundUpdates: POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2" REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth" POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column" + ADD_ROOM_TYPE_COLUMN = "add_room_type_column" _REPLACE_ROOM_DEPTH_SQL_COMMANDS = ( @@ -1200,6 +1256,11 @@ class RoomBackgroundUpdateStore(SQLBaseStore): self._background_add_rooms_room_version_column, ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + self._background_add_room_type_column, + ) + # BG updates to change the type of room_depth.min_depth self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2, @@ -1569,6 +1630,69 @@ class RoomBackgroundUpdateStore(SQLBaseStore): return batch_size + async def _background_add_room_type_column( + self, progress: JsonDict, batch_size: int + ) -> int: + """Background update to go and add room_type information to `room_stats_state` + table from `event_json` table. + """ + + last_room_id = progress.get("room_id", "") + + def _background_add_room_type_column_txn( + txn: LoggingTransaction, + ) -> bool: + sql = """ + SELECT state.room_id, json FROM event_json + INNER JOIN current_state_events AS state USING (event_id) + WHERE state.room_id > ? AND type = 'm.room.create' + ORDER BY state.room_id + LIMIT ? + """ + + txn.execute(sql, (last_room_id, batch_size)) + room_id_to_create_event_results = txn.fetchall() + + new_last_room_id = None + for room_id, event_json in room_id_to_create_event_results: + event_dict = db_to_json(event_json) + + room_type = event_dict.get("content", {}).get( + EventContentFields.ROOM_TYPE, None + ) + if isinstance(room_type, str): + self.db_pool.simple_update_txn( + txn, + table="room_stats_state", + keyvalues={"room_id": room_id}, + updatevalues={"room_type": room_type}, + ) + + new_last_room_id = room_id + + if new_last_room_id is None: + return True + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + {"room_id": new_last_room_id}, + ) + + return False + + end = await self.db_pool.runInteraction( + "_background_add_room_type_column", + _background_add_room_type_column_txn, + ) + + if end: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN + ) + + return batch_size + class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): def __init__( diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 31bc8c5601..e2cccc688c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -21,6 +21,7 @@ from typing import ( FrozenSet, Iterable, List, + Mapping, Optional, Set, Tuple, @@ -31,7 +32,6 @@ import attr from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase -from synapse.events.snapshot import EventContext from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import ( run_as_background_process, @@ -56,6 +56,7 @@ from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList +from synapse.util.iterutils import batch_iter from synapse.util.metrics import Measure if TYPE_CHECKING: @@ -184,7 +185,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): self._check_safe_current_state_events_membership_updated_txn, ) - @cached(max_entries=100000, iterable=True, prune_unread_entries=False) + @cached(max_entries=100000, iterable=True) async def get_users_in_room(self, room_id: str) -> List[str]: return await self.db_pool.runInteraction( "get_users_in_room", self.get_users_in_room_txn, room_id @@ -212,6 +213,60 @@ class RoomMemberWorkerStore(EventsWorkerStore): txn.execute(sql, (room_id, Membership.JOIN)) return [r[0] for r in txn] + @cached() + def get_user_in_room_with_profile( + self, room_id: str, user_id: str + ) -> Dict[str, ProfileInfo]: + raise NotImplementedError() + + @cachedList( + cached_method_name="get_user_in_room_with_profile", list_name="user_ids" + ) + async def get_subset_users_in_room_with_profiles( + self, room_id: str, user_ids: Collection[str] + ) -> Dict[str, ProfileInfo]: + """Get a mapping from user ID to profile information for a list of users + in a given room. + + The profile information comes directly from this room's `m.room.member` + events, and so may be specific to this room rather than part of a user's + global profile. To avoid privacy leaks, the profile data should only be + revealed to users who are already in this room. + + Args: + room_id: The ID of the room to retrieve the users of. + user_ids: a list of users in the room to run the query for + + Returns: + A mapping from user ID to ProfileInfo. + """ + + def _get_subset_users_in_room_with_profiles( + txn: LoggingTransaction, + ) -> Dict[str, ProfileInfo]: + clause, ids = make_in_list_sql_clause( + self.database_engine, "c.state_key", user_ids + ) + + sql = """ + SELECT state_key, display_name, avatar_url FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? AND %s + """ % ( + clause, + ) + txn.execute(sql, (room_id, Membership.JOIN, *ids)) + + return {r[0]: ProfileInfo(display_name=r[1], avatar_url=r[2]) for r in txn} + + return await self.db_pool.runInteraction( + "get_subset_users_in_room_with_profiles", + _get_subset_users_in_room_with_profiles, + ) + @cached(max_entries=100000, iterable=True) async def get_users_in_room_with_profiles( self, room_id: str @@ -338,6 +393,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) @cached() + async def get_number_joined_users_in_room(self, room_id: str) -> int: + return await self.db_pool.simple_select_one_onecol( + table="current_state_events", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="COUNT(*)", + desc="get_number_joined_users_in_room", + ) + + @cached() async def get_invited_rooms_for_local_user( self, user_id: str ) -> List[RoomsForUser]: @@ -416,6 +480,17 @@ class RoomMemberWorkerStore(EventsWorkerStore): user_id: str, membership_list: List[str], ) -> List[RoomsForUser]: + """Get all the rooms for this *local* user where the membership for this user + matches one in the membership list. + + Args: + user_id: The user ID. + membership_list: A list of synapse.api.constants.Membership + values which the user must be in. + + Returns: + The RoomsForUser that the user matches the membership types. + """ # Paranoia check. if not self.hs.is_mine_id(user_id): raise Exception( @@ -444,6 +519,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results + @cached(iterable=True) + async def get_local_users_in_room(self, room_id: str) -> List[str]: + """ + Retrieves a list of the current roommembers who are local to the server. + """ + return await self.db_pool.simple_select_onecol( + table="local_current_membership", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="user_id", + desc="get_local_users_in_room", + ) + async def get_local_current_membership_for_user_in_room( self, user_id: str, room_id: str ) -> Tuple[Optional[str], Optional[str]]: @@ -476,7 +563,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results_dict.get("membership"), results_dict.get("event_id") - @cached(max_entries=500000, iterable=True, prune_unread_entries=False) + @cached(max_entries=500000, iterable=True) async def get_rooms_for_user_with_stream_ordering( self, user_id: str ) -> FrozenSet[GetRoomsForUserWithStreamOrdering]: @@ -647,25 +734,76 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) return frozenset(r.room_id for r in rooms) - @cached( - max_entries=500000, - cache_context=True, - iterable=True, - prune_unread_entries=False, + @cached(max_entries=10000) + async def does_pair_of_users_share_a_room( + self, user_id: str, other_user_id: str + ) -> bool: + raise NotImplementedError() + + @cachedList( + cached_method_name="does_pair_of_users_share_a_room", list_name="other_user_ids" ) - async def get_users_who_share_room_with_user( - self, user_id: str, cache_context: _CacheContext + async def _do_users_share_a_room( + self, user_id: str, other_user_ids: Collection[str] + ) -> Mapping[str, Optional[bool]]: + """Return mapping from user ID to whether they share a room with the + given user. + + Note: `None` and `False` are equivalent and mean they don't share a + room. + """ + + def do_users_share_a_room_txn( + txn: LoggingTransaction, user_ids: Collection[str] + ) -> Dict[str, bool]: + clause, args = make_in_list_sql_clause( + self.database_engine, "state_key", user_ids + ) + + # This query works by fetching both the list of rooms for the target + # user and the set of other users, and then checking if there is any + # overlap. + sql = f""" + SELECT b.state_key + FROM ( + SELECT room_id FROM current_state_events + WHERE type = 'm.room.member' AND membership = 'join' AND state_key = ? + ) AS a + INNER JOIN ( + SELECT room_id, state_key FROM current_state_events + WHERE type = 'm.room.member' AND membership = 'join' AND {clause} + ) AS b using (room_id) + LIMIT 1 + """ + + txn.execute(sql, (user_id, *args)) + return {u: True for u, in txn} + + to_return = {} + for batch_user_ids in batch_iter(other_user_ids, 1000): + res = await self.db_pool.runInteraction( + "do_users_share_a_room", do_users_share_a_room_txn, batch_user_ids + ) + to_return.update(res) + + return to_return + + async def do_users_share_a_room( + self, user_id: str, other_user_ids: Collection[str] ) -> Set[str]: + """Return the set of users who share a room with the first users""" + + user_dict = await self._do_users_share_a_room(user_id, other_user_ids) + + return {u for u, share_room in user_dict.items() if share_room} + + async def get_users_who_share_room_with_user(self, user_id: str) -> Set[str]: """Returns the set of users who share a room with `user_id`""" - room_ids = await self.get_rooms_for_user( - user_id, on_invalidate=cache_context.invalidate - ) + room_ids = await self.get_rooms_for_user(user_id) user_who_share_room = set() for room_id in room_ids: - user_ids = await self.get_users_in_room( - room_id, on_invalidate=cache_context.invalidate - ) + user_ids = await self.get_users_in_room(room_id) user_who_share_room.update(user_ids) return user_who_share_room @@ -694,26 +832,8 @@ class RoomMemberWorkerStore(EventsWorkerStore): return shared_room_ids or frozenset() - async def get_joined_users_from_context( - self, event: EventBase, context: EventContext - ) -> Dict[str, ProfileInfo]: - state_group: Union[object, int] = context.state_group - if not state_group: - # If state_group is None it means it has yet to be assigned a - # state group, i.e. we need to make sure that calls with a state_group - # of None don't hit previous cached calls with a None state_group. - # To do this we set the state_group to a new object as object() != object() - state_group = object() - - current_state_ids = await context.get_current_state_ids() - assert current_state_ids is not None - assert state_group is not None - return await self._get_joined_users_from_context( - event.room_id, state_group, current_state_ids, event=event, context=context - ) - async def get_joined_users_from_state( - self, room_id: str, state_entry: "_StateCacheEntry" + self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry" ) -> Dict[str, ProfileInfo]: state_group: Union[object, int] = state_entry.state_group if not state_group: @@ -726,18 +846,17 @@ class RoomMemberWorkerStore(EventsWorkerStore): assert state_group is not None with Measure(self._clock, "get_joined_users_from_state"): return await self._get_joined_users_from_context( - room_id, state_group, state_entry.state, context=state_entry + room_id, state_group, state, context=state_entry ) - @cached(num_args=2, cache_context=True, iterable=True, max_entries=100000) + @cached(num_args=2, iterable=True, max_entries=100000) async def _get_joined_users_from_context( self, room_id: str, state_group: Union[object, int], current_state_ids: StateMap[str], - cache_context: _CacheContext, event: Optional[EventBase] = None, - context: Optional[Union[EventContext, "_StateCacheEntry"]] = None, + context: Optional["_StateCacheEntry"] = None, ) -> Dict[str, ProfileInfo]: # We don't use `state_group`, it's there so that we can cache based # on it. However, it's important that it's never None, since two current_states @@ -777,7 +896,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): # We don't update the event cache hit ratio as it completely throws off # the hit ratio counts. After all, we don't populate the cache if we # miss it here - event_map = self._get_events_from_cache(member_event_ids, update_metrics=False) + event_map = await self._get_events_from_cache( + member_event_ids, update_metrics=False + ) missing_member_event_ids = [] for event_id in member_event_ids: @@ -836,7 +957,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): iterable=event_ids, retcols=("user_id", "display_name", "avatar_url", "event_id"), keyvalues={"membership": Membership.JOIN}, - batch_size=500, + batch_size=1000, desc="_get_joined_profiles_from_event_ids", ) @@ -931,7 +1052,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) async def get_joined_hosts( - self, room_id: str, state_entry: "_StateCacheEntry" + self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry" ) -> FrozenSet[str]: state_group: Union[object, int] = state_entry.state_group if not state_group: @@ -944,7 +1065,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): assert state_group is not None with Measure(self._clock, "get_joined_hosts"): return await self._get_joined_hosts( - room_id, state_group, state_entry=state_entry + room_id, state_group, state, state_entry=state_entry ) @cached(num_args=2, max_entries=10000, iterable=True) @@ -952,6 +1073,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): self, room_id: str, state_group: Union[object, int], + state: StateMap[str], state_entry: "_StateCacheEntry", ) -> FrozenSet[str]: # We don't use `state_group`, it's there so that we can cache based on @@ -1007,7 +1129,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): # The cache doesn't match the state group or prev state group, # so we calculate the result from first principles. joined_users = await self.get_joined_users_from_state( - room_id, state_entry + room_id, state, state_entry ) cache.hosts_to_joined_users = {} diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 538451b05f..b4c652acf3 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -16,7 +16,7 @@ import logging from enum import Enum from itertools import chain -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast from typing_extensions import Counter @@ -238,6 +238,7 @@ class StatsStore(StateDeltasStore): * avatar * canonical_alias * guest_access + * room_type A is_federatable key can also be included with a boolean value. @@ -263,6 +264,7 @@ class StatsStore(StateDeltasStore): "avatar", "canonical_alias", "guest_access", + "room_type", ): field = fields.get(col, sentinel) if field is not sentinel and (not isinstance(field, str) or "\0" in field): @@ -295,6 +297,7 @@ class StatsStore(StateDeltasStore): keyvalues={id_col: id}, retcol="completed_delta_stream_id", allow_none=True, + desc="get_earliest_token_for_stats", ) async def bulk_update_stats_delta( @@ -571,7 +574,7 @@ class StatsStore(StateDeltasStore): state_event_map = await self.get_events(event_ids, get_prev_content=False) # type: ignore[attr-defined] - room_state = { + room_state: Dict[str, Union[None, bool, str]] = { "join_rules": None, "history_visibility": None, "encryption": None, @@ -580,6 +583,7 @@ class StatsStore(StateDeltasStore): "avatar": None, "canonical_alias": None, "is_federatable": True, + "room_type": None, } for event in state_event_map.values(): @@ -603,6 +607,9 @@ class StatsStore(StateDeltasStore): room_state["is_federatable"] = ( event.content.get(EventContentFields.FEDERATE, True) is True ) + room_type = event.content.get(EventContentFields.ROOM_TYPE) + if isinstance(room_type, str): + room_state["room_type"] = room_type await self.update_room_state(room_id, room_state) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 8e88784d3c..2590b52f73 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -46,10 +46,12 @@ from typing import ( Set, Tuple, cast, + overload, ) import attr from frozendict import frozendict +from typing_extensions import Literal from twisted.internet import defer @@ -795,6 +797,24 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) return RoomStreamToken(topo, stream_ordering) + @overload + def get_stream_id_for_event_txn( + self, + txn: LoggingTransaction, + event_id: str, + allow_none: Literal[False] = False, + ) -> int: + ... + + @overload + def get_stream_id_for_event_txn( + self, + txn: LoggingTransaction, + event_id: str, + allow_none: bool = False, + ) -> Optional[int]: + ... + def get_stream_id_for_event_txn( self, txn: LoggingTransaction, @@ -1002,8 +1022,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): } async def get_all_new_events_stream( - self, from_id: int, current_id: int, limit: int - ) -> Tuple[int, List[EventBase]]: + self, from_id: int, current_id: int, limit: int, get_prev_content: bool = False + ) -> Tuple[int, List[EventBase], Dict[str, Optional[int]]]: """Get all new events Returns all events with from_id < stream_ordering <= current_id. @@ -1012,19 +1032,21 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): from_id: the stream_ordering of the last event we processed current_id: the stream_ordering of the most recently processed event limit: the maximum number of events to return + get_prev_content: whether to fetch previous event content Returns: - A tuple of (next_id, events), where `next_id` is the next value to - pass as `from_id` (it will either be the stream_ordering of the - last returned event, or, if fewer than `limit` events were found, - the `current_id`). + A tuple of (next_id, events, event_to_received_ts), where `next_id` + is the next value to pass as `from_id` (it will either be the + stream_ordering of the last returned event, or, if fewer than `limit` + events were found, the `current_id`). The `event_to_received_ts` is + a dictionary mapping event ID to the event `received_ts`. """ def get_all_new_events_stream_txn( txn: LoggingTransaction, - ) -> Tuple[int, List[str]]: + ) -> Tuple[int, Dict[str, Optional[int]]]: sql = ( - "SELECT e.stream_ordering, e.event_id" + "SELECT e.stream_ordering, e.event_id, e.received_ts" " FROM events AS e" " WHERE" " ? < e.stream_ordering AND e.stream_ordering <= ?" @@ -1039,15 +1061,21 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if len(rows) == limit: upper_bound = rows[-1][0] - return upper_bound, [row[1] for row in rows] + event_to_received_ts: Dict[str, Optional[int]] = { + row[1]: row[2] for row in rows + } + return upper_bound, event_to_received_ts - upper_bound, event_ids = await self.db_pool.runInteraction( + upper_bound, event_to_received_ts = await self.db_pool.runInteraction( "get_all_new_events_stream", get_all_new_events_stream_txn ) - events = await self.get_events_as_list(event_ids) + events = await self.get_events_as_list( + event_to_received_ts.keys(), + get_prev_content=get_prev_content, + ) - return upper_bound, events + return upper_bound, events, event_to_received_ts async def get_federation_out_pos(self, typ: str) -> int: if self._need_to_reset_federation_stream_positions: diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index fa9eadaca7..a7fcc564a9 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -24,6 +24,7 @@ from synapse.storage.database import ( from synapse.storage.engines import PostgresEngine from synapse.storage.state import StateFilter from synapse.types import MutableStateMap, StateMap +from synapse.util.caches import intern_string if TYPE_CHECKING: from synapse.server import HomeServer @@ -136,7 +137,7 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): txn.execute(sql % (where_clause,), args) for row in txn: typ, state_key, event_id = row - key = (typ, state_key) + key = (intern_string(typ), intern_string(state_key)) results[group][key] = event_id else: max_entries_returned = state_filter.max_entries_returned() diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 609a2b88bf..bb64543c1f 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -202,7 +202,14 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): requests state from the cache, if False we need to query the DB for the missing state. """ - cache_entry = cache.get(group) + # If we are asked explicitly for a subset of keys, we only ask for those + # from the cache. This ensures that the `DictionaryCache` can make + # better decisions about what to cache and what to expire. + dict_keys = None + if not state_filter.has_wildcards(): + dict_keys = state_filter.concrete_types() + + cache_entry = cache.get(group, dict_keys=dict_keys) state_dict_ids = cache_entry.value if cache_entry.full or state_filter.is_full(): @@ -400,14 +407,17 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): room_id: str, prev_group: Optional[int], delta_ids: Optional[StateMap[str]], - current_state_ids: StateMap[str], + current_state_ids: Optional[StateMap[str]], ) -> int: """Store a new set of state, returning a newly assigned state group. + At least one of `current_state_ids` and `prev_group` must be provided. Whenever + `prev_group` is not None, `delta_ids` must also not be None. + Args: event_id: The event ID for which the state was calculated room_id - prev_group: A previous state group for the room, optional. + prev_group: A previous state group for the room. delta_ids: The delta between state at `prev_group` and `current_state_ids`, if `prev_group` was given. Same format as `current_state_ids`. @@ -418,10 +428,41 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): The state group ID """ - def _store_state_group_txn(txn: LoggingTransaction) -> int: - if current_state_ids is None: - # AFAIK, this can never happen - raise Exception("current_state_ids cannot be None") + if prev_group is None and current_state_ids is None: + raise Exception("current_state_ids and prev_group can't both be None") + + if prev_group is not None and delta_ids is None: + raise Exception("delta_ids is None when prev_group is not None") + + def insert_delta_group_txn( + txn: LoggingTransaction, prev_group: int, delta_ids: StateMap[str] + ) -> Optional[int]: + """Try and persist the new group as a delta. + + Requires that we have the state as a delta from a previous state group. + + Returns: + The state group if successfully created, or None if the state + needs to be persisted as a full state. + """ + is_in_db = self.db_pool.simple_select_one_onecol_txn( + txn, + table="state_groups", + keyvalues={"id": prev_group}, + retcol="id", + allow_none=True, + ) + if not is_in_db: + raise Exception( + "Trying to persist state with unpersisted prev_group: %r" + % (prev_group,) + ) + + # if the chain of state group deltas is going too long, we fall back to + # persisting a complete state group. + potential_hops = self._count_state_group_hops_txn(txn, prev_group) + if potential_hops >= MAX_STATE_DELTA_HOPS: + return None state_group = self._state_group_seq_gen.get_next_id_txn(txn) @@ -431,51 +472,45 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): values={"id": state_group, "room_id": room_id, "event_id": event_id}, ) - # We persist as a delta if we can, while also ensuring the chain - # of deltas isn't tooo long, as otherwise read performance degrades. - if prev_group: - is_in_db = self.db_pool.simple_select_one_onecol_txn( - txn, - table="state_groups", - keyvalues={"id": prev_group}, - retcol="id", - allow_none=True, - ) - if not is_in_db: - raise Exception( - "Trying to persist state with unpersisted prev_group: %r" - % (prev_group,) - ) - - potential_hops = self._count_state_group_hops_txn(txn, prev_group) - if prev_group and potential_hops < MAX_STATE_DELTA_HOPS: - assert delta_ids is not None - - self.db_pool.simple_insert_txn( - txn, - table="state_group_edges", - values={"state_group": state_group, "prev_state_group": prev_group}, - ) + self.db_pool.simple_insert_txn( + txn, + table="state_group_edges", + values={"state_group": state_group, "prev_state_group": prev_group}, + ) - self.db_pool.simple_insert_many_txn( - txn, - table="state_groups_state", - keys=("state_group", "room_id", "type", "state_key", "event_id"), - values=[ - (state_group, room_id, key[0], key[1], state_id) - for key, state_id in delta_ids.items() - ], - ) - else: - self.db_pool.simple_insert_many_txn( - txn, - table="state_groups_state", - keys=("state_group", "room_id", "type", "state_key", "event_id"), - values=[ - (state_group, room_id, key[0], key[1], state_id) - for key, state_id in current_state_ids.items() - ], - ) + self.db_pool.simple_insert_many_txn( + txn, + table="state_groups_state", + keys=("state_group", "room_id", "type", "state_key", "event_id"), + values=[ + (state_group, room_id, key[0], key[1], state_id) + for key, state_id in delta_ids.items() + ], + ) + + return state_group + + def insert_full_state_txn( + txn: LoggingTransaction, current_state_ids: StateMap[str] + ) -> int: + """Persist the full state, returning the new state group.""" + state_group = self._state_group_seq_gen.get_next_id_txn(txn) + + self.db_pool.simple_insert_txn( + txn, + table="state_groups", + values={"id": state_group, "room_id": room_id, "event_id": event_id}, + ) + + self.db_pool.simple_insert_many_txn( + txn, + table="state_groups_state", + keys=("state_group", "room_id", "type", "state_key", "event_id"), + values=[ + (state_group, room_id, key[0], key[1], state_id) + for key, state_id in current_state_ids.items() + ], + ) # Prefill the state group caches with this group. # It's fine to use the sequence like this as the state group map @@ -491,7 +526,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): self._state_group_members_cache.update, self._state_group_members_cache.sequence, key=state_group, - value=dict(current_member_state_ids), + value=current_member_state_ids, ) current_non_member_state_ids = { @@ -503,13 +538,35 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): self._state_group_cache.update, self._state_group_cache.sequence, key=state_group, - value=dict(current_non_member_state_ids), + value=current_non_member_state_ids, ) return state_group + if prev_group is not None: + state_group = await self.db_pool.runInteraction( + "store_state_group.insert_delta_group", + insert_delta_group_txn, + prev_group, + delta_ids, + ) + if state_group is not None: + return state_group + + # We're going to persist the state as a complete group rather than + # a delta, so first we need to ensure we have loaded the state map + # from the database. + if current_state_ids is None: + assert prev_group is not None + assert delta_ids is not None + groups = await self._get_state_for_groups([prev_group]) + current_state_ids = dict(groups[prev_group]) + current_state_ids.update(delta_ids) + return await self.db_pool.runInteraction( - "store_state_group", _store_state_group_txn + "store_state_group.insert_full_state", + insert_full_state_txn, + current_state_ids, ) async def purge_unreferenced_state_groups( diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index dc237e3032..a9a88c8bfd 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -74,13 +74,14 @@ Changes in SCHEMA_VERSION = 71: Changes in SCHEMA_VERSION = 72: - event_edges.(room_id, is_state) are no longer written to. + - Tables related to groups are dropped. """ SCHEMA_COMPAT_VERSION = ( - # We no longer maintain `event_edges.room_id`, so synapses with SCHEMA_VERSION < 71 - # will break. - 71 + # The groups tables are no longer accessible, so synapses with SCHEMA_VERSION < 72 + # could break. + 72 ) """Limit on how far the synapse codebase can be rolled back without breaking db compat diff --git a/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql new file mode 100644 index 0000000000..d5e0765471 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql @@ -0,0 +1,19 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE room_stats_state ADD room_type TEXT; + +INSERT INTO background_updates (update_name, progress_json) + VALUES ('add_room_type_column', '{}'); diff --git a/synapse/storage/schema/main/delta/72/01event_push_summary_receipt.sql b/synapse/storage/schema/main/delta/72/01event_push_summary_receipt.sql new file mode 100644 index 0000000000..e45db61529 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/01event_push_summary_receipt.sql @@ -0,0 +1,35 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a column that records the position of the read receipt for the user at +-- the time we summarised the push actions. This is used to check if the counts +-- are up to date after a new read receipt has been sent. +-- +-- Null means that we can skip that check, as the row was written by an older +-- version of Synapse that updated `event_push_summary` synchronously when +-- persisting a new read receipt +ALTER TABLE event_push_summary ADD COLUMN last_receipt_stream_ordering BIGINT; + + +-- Tracks which new receipts we've handled +CREATE TABLE event_push_summary_last_receipt_stream_id ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); + +INSERT INTO event_push_summary_last_receipt_stream_id (stream_id) + SELECT COALESCE(MAX(stream_id), 0) + FROM receipts_linearized; diff --git a/synapse/storage/schema/main/delta/72/02event_push_actions_index.sql b/synapse/storage/schema/main/delta/72/02event_push_actions_index.sql new file mode 100644 index 0000000000..cd0c11d951 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/02event_push_actions_index.sql @@ -0,0 +1,19 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add an index to `event_push_actions` to make deleting old non-highlight push +-- actions faster. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7202, 'event_push_actions_stream_highlight_index', '{}'); diff --git a/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py b/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py new file mode 100644 index 0000000000..55a5d092cc --- /dev/null +++ b/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py @@ -0,0 +1,47 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from synapse.storage.types import Cursor + + +def run_create(cur: Cursor, database_engine, *args, **kwargs): + """Add a bg update to populate the `state_key` and `rejection_reason` columns of `events`""" + + # we know that any new events will have the columns populated (and that has been + # the case since schema_version 68, so there is no chance of rolling back now). + # + # So, we only need to make sure that existing rows are updated. We read the + # current min and max stream orderings, since that is guaranteed to include all + # the events that were stored before the new columns were added. + cur.execute("SELECT MIN(stream_ordering), MAX(stream_ordering) FROM events") + (min_stream_ordering, max_stream_ordering) = cur.fetchone() + + if min_stream_ordering is None: + # no rows, nothing to do. + return + + cur.execute( + "INSERT into background_updates (ordering, update_name, progress_json)" + " VALUES (7203, 'events_populate_state_key_rejections', ?)", + ( + json.dumps( + { + "min_stream_ordering_exclusive": min_stream_ordering - 1, + "max_stream_ordering_inclusive": max_stream_ordering, + } + ), + ), + ) diff --git a/synapse/storage/schema/main/delta/72/03drop_event_reference_hashes.sql b/synapse/storage/schema/main/delta/72/03drop_event_reference_hashes.sql new file mode 100644 index 0000000000..0da668aa3a --- /dev/null +++ b/synapse/storage/schema/main/delta/72/03drop_event_reference_hashes.sql @@ -0,0 +1,17 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- event_reference_hashes is unused, so we can drop it +DROP TABLE event_reference_hashes; diff --git a/synapse/storage/schema/main/delta/72/03remove_groups.sql b/synapse/storage/schema/main/delta/72/03remove_groups.sql new file mode 100644 index 0000000000..b7c5894de8 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/03remove_groups.sql @@ -0,0 +1,31 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Remove the tables which powered the unspecced groups/communities feature. +DROP TABLE IF EXISTS group_attestations_remote; +DROP TABLE IF EXISTS group_attestations_renewals; +DROP TABLE IF EXISTS group_invites; +DROP TABLE IF EXISTS group_roles; +DROP TABLE IF EXISTS group_room_categories; +DROP TABLE IF EXISTS group_rooms; +DROP TABLE IF EXISTS group_summary_roles; +DROP TABLE IF EXISTS group_summary_room_categories; +DROP TABLE IF EXISTS group_summary_rooms; +DROP TABLE IF EXISTS group_summary_users; +DROP TABLE IF EXISTS group_users; +DROP TABLE IF EXISTS groups; +DROP TABLE IF EXISTS local_group_membership; +DROP TABLE IF EXISTS local_group_updates; +DROP TABLE IF EXISTS remote_profile_cache; diff --git a/synapse/storage/util/partial_state_events_tracker.py b/synapse/storage/util/partial_state_events_tracker.py index 211437cfaa..466e5137f2 100644 --- a/synapse/storage/util/partial_state_events_tracker.py +++ b/synapse/storage/util/partial_state_events_tracker.py @@ -166,6 +166,7 @@ class PartialCurrentStateTracker: logger.info( "Awaiting un-partial-stating of room %s", room_id, + stack_info=True, ) await make_deferred_yieldable(d) diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index 6323d452e7..a90f08dd4c 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -20,6 +20,7 @@ from typing import Any, Callable, Dict, Generator, Optional import attr from frozendict import frozendict from matrix_common.versionstring import get_distribution_version_string +from typing_extensions import ParamSpec from twisted.internet import defer, task from twisted.internet.defer import Deferred @@ -82,6 +83,9 @@ def unwrapFirstError(failure: Failure) -> Failure: return failure.value.subFailure # type: ignore[union-attr] # Issue in Twisted's annotations +P = ParamSpec("P") + + @attr.s(slots=True) class Clock: """ @@ -110,7 +114,7 @@ class Clock: return int(self.time() * 1000) def looping_call( - self, f: Callable, msec: float, *args: Any, **kwargs: Any + self, f: Callable[P, object], msec: float, *args: P.args, **kwargs: P.kwargs ) -> LoopingCall: """Call a function repeatedly. diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index d267703df0..fa91479c97 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -14,11 +14,13 @@ import enum import logging import threading -from typing import Any, Dict, Generic, Iterable, Optional, Set, TypeVar +from typing import Any, Dict, Generic, Iterable, Optional, Set, Tuple, TypeVar, Union import attr +from typing_extensions import Literal from synapse.util.caches.lrucache import LruCache +from synapse.util.caches.treecache import TreeCache logger = logging.getLogger(__name__) @@ -33,10 +35,12 @@ DV = TypeVar("DV") # This class can't be generic because it uses slots with attrs. # See: https://github.com/python-attrs/attrs/issues/313 -@attr.s(slots=True, auto_attribs=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class DictionaryEntry: # should be: Generic[DKT, DV]. """Returned when getting an entry from the cache + If `full` is true then `known_absent` will be the empty set. + Attributes: full: Whether the cache has the full or dict or just some keys. If not full then not all requested keys will necessarily be present @@ -53,20 +57,90 @@ class DictionaryEntry: # should be: Generic[DKT, DV]. return len(self.value) +class _FullCacheKey(enum.Enum): + """The key we use to cache the full dict.""" + + KEY = object() + + class _Sentinel(enum.Enum): # defining a sentinel in this way allows mypy to correctly handle the # type of a dictionary lookup. sentinel = object() +class _PerKeyValue(Generic[DV]): + """The cached value of a dictionary key. If `value` is the sentinel, + indicates that the requested key is known to *not* be in the full dict. + """ + + __slots__ = ["value"] + + def __init__(self, value: Union[DV, Literal[_Sentinel.sentinel]]) -> None: + self.value = value + + def __len__(self) -> int: + # We add a `__len__` implementation as we use this class in a cache + # where the values are variable length. + return 1 + + class DictionaryCache(Generic[KT, DKT, DV]): """Caches key -> dictionary lookups, supporting caching partial dicts, i.e. fetching a subset of dictionary keys for a particular key. + + This cache has two levels of key. First there is the "cache key" (of type + `KT`), which maps to a dict. The keys to that dict are the "dict key" (of + type `DKT`). The overall structure is therefore `KT->DKT->DV`. For + example, it might look like: + + { + 1: { 1: "a", 2: "b" }, + 2: { 1: "c" }, + } + + It is possible to look up either individual dict keys, or the *complete* + dict for a given cache key. + + Each dict item, and the complete dict is treated as a separate LRU + entry for the purpose of cache expiry. For example, given: + dict_cache.get(1, None) -> DictionaryEntry({1: "a", 2: "b"}) + dict_cache.get(1, [1]) -> DictionaryEntry({1: "a"}) + dict_cache.get(1, [2]) -> DictionaryEntry({2: "b"}) + + ... then the cache entry for the complete dict will expire first, + followed by the cache entry for the '1' dict key, and finally that + for the '2' dict key. """ def __init__(self, name: str, max_entries: int = 1000): - self.cache: LruCache[KT, DictionaryEntry] = LruCache( - max_size=max_entries, cache_name=name, size_callback=len + # We use a single LruCache to store two different types of entries: + # 1. Map from (key, dict_key) -> dict value (or sentinel, indicating + # the key doesn't exist in the dict); and + # 2. Map from (key, _FullCacheKey.KEY) -> full dict. + # + # The former is used when explicit keys of the dictionary are looked up, + # and the latter when the full dictionary is requested. + # + # If when explicit keys are requested and not in the cache, we then look + # to see if we have the full dict and use that if we do. If found in the + # full dict each key is added into the cache. + # + # This set up allows the `LruCache` to prune the full dict entries if + # they haven't been used in a while, even when there have been recent + # queries for subsets of the dict. + # + # Typing: + # * A key of `(KT, DKT)` has a value of `_PerKeyValue` + # * A key of `(KT, _FullCacheKey.KEY)` has a value of `Dict[DKT, DV]` + self.cache: LruCache[ + Tuple[KT, Union[DKT, Literal[_FullCacheKey.KEY]]], + Union[_PerKeyValue, Dict[DKT, DV]], + ] = LruCache( + max_size=max_entries, + cache_name=name, + cache_type=TreeCache, + size_callback=len, ) self.name = name @@ -91,23 +165,83 @@ class DictionaryCache(Generic[KT, DKT, DV]): Args: key dict_keys: If given a set of keys then return only those keys - that exist in the cache. + that exist in the cache. If None then returns the full dict + if it is in the cache. Returns: - DictionaryEntry + DictionaryEntry: If `dict_keys` is not None then `DictionaryEntry` + will contain include the keys that are in the cache. If None then + will either return the full dict if in the cache, or the empty + dict (with `full` set to False) if it isn't. """ - entry = self.cache.get(key, _Sentinel.sentinel) - if entry is not _Sentinel.sentinel: - if dict_keys is None: - return DictionaryEntry( - entry.full, entry.known_absent, dict(entry.value) - ) + if dict_keys is None: + # The caller wants the full set of dictionary keys for this cache key + return self._get_full_dict(key) + + # We are being asked for a subset of keys. + + # First go and check for each requested dict key in the cache, tracking + # which we couldn't find. + values = {} + known_absent = set() + missing = [] + for dict_key in dict_keys: + entry = self.cache.get((key, dict_key), _Sentinel.sentinel) + if entry is _Sentinel.sentinel: + missing.append(dict_key) + continue + + assert isinstance(entry, _PerKeyValue) + + if entry.value is _Sentinel.sentinel: + known_absent.add(dict_key) else: - return DictionaryEntry( - entry.full, - entry.known_absent, - {k: entry.value[k] for k in dict_keys if k in entry.value}, - ) + values[dict_key] = entry.value + + # If we found everything we can return immediately. + if not missing: + return DictionaryEntry(False, known_absent, values) + + # We are missing some keys, so check if we happen to have the full dict in + # the cache. + # + # We don't update the last access time for this cache fetch, as we + # aren't explicitly interested in the full dict and so we don't want + # requests for explicit dict keys to keep the full dict in the cache. + entry = self.cache.get( + (key, _FullCacheKey.KEY), + _Sentinel.sentinel, + update_last_access=False, + ) + if entry is _Sentinel.sentinel: + # Not in the cache, return the subset of keys we found. + return DictionaryEntry(False, known_absent, values) + + # We have the full dict! + assert isinstance(entry, dict) + + for dict_key in missing: + # We explicitly add each dict key to the cache, so that cache hit + # rates and LRU times for each key can be tracked separately. + value = entry.get(dict_key, _Sentinel.sentinel) # type: ignore[arg-type] + self.cache[(key, dict_key)] = _PerKeyValue(value) + + if value is not _Sentinel.sentinel: + values[dict_key] = value + + return DictionaryEntry(True, set(), values) + + def _get_full_dict( + self, + key: KT, + ) -> DictionaryEntry: + """Fetch the full dict for the given key.""" + + # First we check if we have cached the full dict. + entry = self.cache.get((key, _FullCacheKey.KEY), _Sentinel.sentinel) + if entry is not _Sentinel.sentinel: + assert isinstance(entry, dict) + return DictionaryEntry(True, set(), entry) return DictionaryEntry(False, set(), {}) @@ -117,7 +251,13 @@ class DictionaryCache(Generic[KT, DKT, DV]): # Increment the sequence number so that any SELECT statements that # raced with the INSERT don't update the cache (SYN-369) self.sequence += 1 - self.cache.pop(key, None) + + # We want to drop all information about the dict for the given key, so + # we use `del_multi` to delete it all in one go. + # + # We ignore the type error here: `del_multi` accepts a truncated key + # (when the key type is a tuple). + self.cache.del_multi((key,)) # type: ignore[arg-type] def invalidate_all(self) -> None: self.check_thread() @@ -131,7 +271,16 @@ class DictionaryCache(Generic[KT, DKT, DV]): value: Dict[DKT, DV], fetched_keys: Optional[Iterable[DKT]] = None, ) -> None: - """Updates the entry in the cache + """Updates the entry in the cache. + + Note: This does *not* invalidate any existing entries for the `key`. + In particular, if we add an entry for the cached "full dict" with + `fetched_keys=None`, existing entries for individual dict keys are + not invalidated. Likewise, adding entries for individual keys does + not invalidate any cached value for the full dict. + + In other words: if the underlying data is *changed*, the cache must + be explicitly invalidated via `.invalidate()`. Args: sequence @@ -149,20 +298,27 @@ class DictionaryCache(Generic[KT, DKT, DV]): # Only update the cache if the caches sequence number matches the # number that the cache had before the SELECT was started (SYN-369) if fetched_keys is None: - self._insert(key, value, set()) + self.cache[(key, _FullCacheKey.KEY)] = value else: - self._update_or_insert(key, value, fetched_keys) + self._update_subset(key, value, fetched_keys) - def _update_or_insert( - self, key: KT, value: Dict[DKT, DV], known_absent: Iterable[DKT] + def _update_subset( + self, key: KT, value: Dict[DKT, DV], fetched_keys: Iterable[DKT] ) -> None: - # We pop and reinsert as we need to tell the cache the size may have - # changed + """Add the given dictionary values as explicit keys in the cache. + + Args: + key: top-level cache key + value: The dictionary with all the values that we should cache + fetched_keys: The full set of dict keys that were looked up. Any keys + here not in `value` should be marked as "known absent". + """ + + for dict_key, dict_value in value.items(): + self.cache[(key, dict_key)] = _PerKeyValue(dict_value) - entry: DictionaryEntry = self.cache.pop(key, DictionaryEntry(False, set(), {})) - entry.value.update(value) - entry.known_absent.update(known_absent) - self.cache[key] = entry + for dict_key in fetched_keys: + if dict_key in value: + continue - def _insert(self, key: KT, value: Dict[DKT, DV], known_absent: Set[DKT]) -> None: - self.cache[key] = DictionaryEntry(True, known_absent, value) + self.cache[(key, dict_key)] = _PerKeyValue(_Sentinel.sentinel) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index a3b60578e3..b3bdedb04c 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -25,8 +25,10 @@ from typing import ( Collection, Dict, Generic, + Iterable, List, Optional, + Tuple, Type, TypeVar, Union, @@ -44,7 +46,11 @@ from synapse.metrics.background_process_metrics import wrap_as_background_proces from synapse.metrics.jemalloc import get_jemalloc_stats from synapse.util import Clock, caches from synapse.util.caches import CacheMetric, EvictionReason, register_cache -from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry +from synapse.util.caches.treecache import ( + TreeCache, + iterate_tree_cache_entry, + iterate_tree_cache_items, +) from synapse.util.linked_list import ListNode if TYPE_CHECKING: @@ -109,7 +115,7 @@ GLOBAL_ROOT = ListNode["_Node"].create_root_node() @wrap_as_background_process("LruCache._expire_old_entries") async def _expire_old_entries( - clock: Clock, expiry_seconds: int, autotune_config: Optional[dict] + clock: Clock, expiry_seconds: float, autotune_config: Optional[dict] ) -> None: """Walks the global cache list to find cache entries that haven't been accessed in the given number of seconds, or if a given memory threshold has been breached. @@ -537,6 +543,7 @@ class LruCache(Generic[KT, VT]): default: Literal[None] = None, callbacks: Collection[Callable[[], None]] = ..., update_metrics: bool = ..., + update_last_access: bool = ..., ) -> Optional[VT]: ... @@ -546,6 +553,7 @@ class LruCache(Generic[KT, VT]): default: T, callbacks: Collection[Callable[[], None]] = ..., update_metrics: bool = ..., + update_last_access: bool = ..., ) -> Union[T, VT]: ... @@ -555,10 +563,27 @@ class LruCache(Generic[KT, VT]): default: Optional[T] = None, callbacks: Collection[Callable[[], None]] = (), update_metrics: bool = True, + update_last_access: bool = True, ) -> Union[None, T, VT]: + """Look up a key in the cache + + Args: + key + default + callbacks: A collection of callbacks that will fire when the + node is removed from the cache (either due to invalidation + or expiry). + update_metrics: Whether to update the hit rate metrics + update_last_access: Whether to update the last access metrics + on a node if successfully fetched. These metrics are used + to determine when to remove the node from the cache. Set + to False if this fetch should *not* prevent a node from + being expired. + """ node = cache.get(key, None) if node is not None: - move_node_to_front(node) + if update_last_access: + move_node_to_front(node) node.add_callbacks(callbacks) if update_metrics and metrics: metrics.inc_hits() @@ -568,6 +593,65 @@ class LruCache(Generic[KT, VT]): metrics.inc_misses() return default + @overload + def cache_get_multi( + key: tuple, + default: Literal[None] = None, + update_metrics: bool = True, + ) -> Union[None, Iterable[Tuple[KT, VT]]]: + ... + + @overload + def cache_get_multi( + key: tuple, + default: T, + update_metrics: bool = True, + ) -> Union[T, Iterable[Tuple[KT, VT]]]: + ... + + @synchronized + def cache_get_multi( + key: tuple, + default: Optional[T] = None, + update_metrics: bool = True, + ) -> Union[None, T, Iterable[Tuple[KT, VT]]]: + """Returns a generator yielding all entries under the given key. + + Can only be used if backed by a tree cache. + + Example: + + cache = LruCache(10, cache_type=TreeCache) + cache[(1, 1)] = "a" + cache[(1, 2)] = "b" + cache[(2, 1)] = "c" + + items = cache.get_multi((1,)) + assert list(items) == [((1, 1), "a"), ((1, 2), "b")] + + Returns: + Either default if the key doesn't exist, or a generator of the + key/value pairs. + """ + + assert isinstance(cache, TreeCache) + + node = cache.get(key, None) + if node is not None: + if update_metrics and metrics: + metrics.inc_hits() + + # We store entries in the `TreeCache` with values of type `_Node`, + # which we need to unwrap. + return ( + (full_key, lru_node.value) + for full_key, lru_node in iterate_tree_cache_items(key, node) + ) + else: + if update_metrics and metrics: + metrics.inc_misses() + return default + @synchronized def cache_set( key: KT, value: VT, callbacks: Collection[Callable[[], None]] = () @@ -674,6 +758,8 @@ class LruCache(Generic[KT, VT]): self.setdefault = cache_set_default self.pop = cache_pop self.del_multi = cache_del_multi + if cache_type is TreeCache: + self.get_multi = cache_get_multi # `invalidate` is exposed for consistency with DeferredCache, so that it can be # invalidated by the cache invalidation replication stream. self.invalidate = cache_del_multi @@ -730,3 +816,41 @@ class LruCache(Generic[KT, VT]): # This happens e.g. in the sync code where we have an expiring cache of # lru caches. self.clear() + + +class AsyncLruCache(Generic[KT, VT]): + """ + An asynchronous wrapper around a subset of the LruCache API. + + On its own this doesn't change the behaviour but allows subclasses that + utilize external cache systems that require await behaviour to be created. + """ + + def __init__(self, *args, **kwargs): # type: ignore + self._lru_cache: LruCache[KT, VT] = LruCache(*args, **kwargs) + + async def get( + self, key: KT, default: Optional[T] = None, update_metrics: bool = True + ) -> Optional[VT]: + return self._lru_cache.get(key, update_metrics=update_metrics) + + async def set(self, key: KT, value: VT) -> None: + self._lru_cache.set(key, value) + + async def invalidate(self, key: KT) -> None: + # This method should invalidate any external cache and then invalidate the LruCache. + return self._lru_cache.invalidate(key) + + def invalidate_local(self, key: KT) -> None: + """Remove an entry from the local cache + + This variant of `invalidate` is useful if we know that the external + cache has already been invalidated. + """ + return self._lru_cache.invalidate(key) + + async def contains(self, key: KT) -> bool: + return self._lru_cache.contains(key) + + async def clear(self) -> None: + self._lru_cache.clear() diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index e78305f787..c1b8ec0c73 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -64,6 +64,15 @@ class TreeCache: self.size += 1 def get(self, key, default=None): + """When `key` is a full key, fetches the value for the given key (if + any). + + If `key` is only a partial key (i.e. a truncated tuple) then returns a + `TreeCacheNode`, which can be passed to the `iterate_tree_cache_*` + functions to iterate over all entries in the cache with keys that start + with the given partial key. + """ + node = self.root for k in key[:-1]: node = node.get(k, None) @@ -139,3 +148,32 @@ def iterate_tree_cache_entry(d): yield from iterate_tree_cache_entry(value_d) else: yield d + + +def iterate_tree_cache_items(key, value): + """Helper function to iterate over the leaves of a tree, i.e. a dict of that + can contain dicts. + + The provided key is a tuple that will get prepended to the returned keys. + + Example: + + cache = TreeCache() + cache[(1, 1)] = "a" + cache[(1, 2)] = "b" + cache[(2, 1)] = "c" + + tree_node = cache.get((1,)) + + items = iterate_tree_cache_items((1,), tree_node) + assert list(items) == [((1, 1), "a"), ((1, 2), "b")] + + Returns: + A generator yielding key/value pairs. + """ + if isinstance(value, TreeCacheNode): + for sub_key, sub_value in value.items(): + yield from iterate_tree_cache_items((*key, sub_key), sub_value) + else: + # we've reached a leaf of the tree. + yield key, value diff --git a/synapse/visibility.py b/synapse/visibility.py index 8aaa8c709f..9abbaa5a64 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -13,16 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from enum import Enum, auto from typing import Collection, Dict, FrozenSet, List, Optional, Tuple +import attr from typing_extensions import Final from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.events import EventBase +from synapse.events.snapshot import EventContext from synapse.events.utils import prune_event from synapse.storage.controllers import StorageControllers +from synapse.storage.databases.main import DataStore from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id +from synapse.util import Clock logger = logging.getLogger(__name__) @@ -102,165 +107,410 @@ async def filter_events_for_client( ] = await storage.main.get_retention_policy_for_room(room_id) def allowed(event: EventBase) -> Optional[EventBase]: - """ - Args: - event: event to check - - Returns: - None if the user cannot see this event at all - - a redacted copy of the event if they can only see a redacted - version - - the original event if they can see it as normal. - """ - # Only run some checks if these events aren't about to be sent to clients. This is - # because, if this is not the case, we're probably only checking if the users can - # see events in the room at that point in the DAG, and that shouldn't be decided - # on those checks. - if filter_send_to_client: - if event.type == EventTypes.Dummy: - return None - - if not event.is_state() and event.sender in ignore_list: - return None - - # Until MSC2261 has landed we can't redact malicious alias events, so for - # now we temporarily filter out m.room.aliases entirely to mitigate - # abuse, while we spec a better solution to advertising aliases - # on rooms. - if event.type == EventTypes.Aliases: - return None - - # Don't try to apply the room's retention policy if the event is a state - # event, as MSC1763 states that retention is only considered for non-state - # events. - if not event.is_state(): - retention_policy = retention_policies[event.room_id] - max_lifetime = retention_policy.max_lifetime - - if max_lifetime is not None: - oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime - - if event.origin_server_ts < oldest_allowed_ts: - return None - - if event.event_id in always_include_ids: - return event + return _check_client_allowed_to_see_event( + user_id=user_id, + event=event, + clock=storage.main.clock, + filter_send_to_client=filter_send_to_client, + sender_ignored=event.sender in ignore_list, + always_include_ids=always_include_ids, + retention_policy=retention_policies[room_id], + state=event_id_to_state.get(event.event_id), + is_peeking=is_peeking, + sender_erased=erased_senders.get(event.sender, False), + ) - # we need to handle outliers separately, since we don't have the room state. - if event.internal_metadata.outlier: - # Normally these can't be seen by clients, but we make an exception for - # for out-of-band membership events (eg, incoming invites, or rejections of - # said invite) for the user themselves. - if event.type == EventTypes.Member and event.state_key == user_id: - logger.debug("Returning out-of-band-membership event %s", event) - return event + # Check each event: gives an iterable of None or (a potentially modified) + # EventBase. + filtered_events = map(allowed, events) + + # Turn it into a list and remove None entries before returning. + return [ev for ev in filtered_events if ev] - return None - state = event_id_to_state[event.event_id] +async def filter_event_for_clients_with_state( + store: DataStore, + user_ids: Collection[str], + event: EventBase, + context: EventContext, + is_peeking: bool = False, + filter_send_to_client: bool = True, +) -> Collection[str]: + """ + Checks to see if an event is visible to the users in the list at the time of + the event. - # get the room_visibility at the time of the event. - visibility = get_effective_room_visibility_from_state(state) + Note: This does *not* check if the sender of the event was erased. - # Always allow history visibility events on boundaries. This is done - # by setting the effective visibility to the least restrictive - # of the old vs new. - if event.type == EventTypes.RoomHistoryVisibility: - prev_content = event.unsigned.get("prev_content", {}) - prev_visibility = prev_content.get("history_visibility", None) + Args: + store: databases + user_ids: user_ids to be checked + event: the event to be checked + context: EventContext for the event to be checked + is_peeking: Whether the users are peeking into the room, ie not + currently joined + filter_send_to_client: Whether we're checking an event that's going to be + sent to a client. This might not always be the case since this function can + also be called to check whether a user can see the state at a given point. - if prev_visibility not in VISIBILITY_PRIORITY: - prev_visibility = HistoryVisibility.SHARED + Returns: + Collection of user IDs for whom the event is visible + """ + # None of the users should see the event if it is soft_failed + if event.internal_metadata.is_soft_failed(): + return [] - new_priority = VISIBILITY_PRIORITY.index(visibility) - old_priority = VISIBILITY_PRIORITY.index(prev_visibility) - if old_priority < new_priority: - visibility = prev_visibility + # Make a set for all user IDs that haven't been filtered out by a check. + allowed_user_ids = set(user_ids) - # likewise, if the event is the user's own membership event, use - # the 'most joined' membership - membership = None - if event.type == EventTypes.Member and event.state_key == user_id: - membership = event.content.get("membership", None) - if membership not in MEMBERSHIP_PRIORITY: - membership = "leave" - - prev_content = event.unsigned.get("prev_content", {}) - prev_membership = prev_content.get("membership", None) - if prev_membership not in MEMBERSHIP_PRIORITY: - prev_membership = "leave" - - # Always allow the user to see their own leave events, otherwise - # they won't see the room disappear if they reject the invite - # - # (Note this doesn't work for out-of-band invite rejections, which don't - # have prev_state populated. They are handled above in the outlier code.) - if membership == "leave" and ( - prev_membership == "join" or prev_membership == "invite" + # Only run some checks if these events aren't about to be sent to clients. This is + # because, if this is not the case, we're probably only checking if the users can + # see events in the room at that point in the DAG, and that shouldn't be decided + # on those checks. + if filter_send_to_client: + ignored_by = await store.ignored_by(event.sender) + retention_policy = await store.get_retention_policy_for_room(event.room_id) + + for user_id in user_ids: + if ( + _check_filter_send_to_client( + event, + store.clock, + retention_policy, + sender_ignored=user_id in ignored_by, + ) + == _CheckFilter.DENIED ): - return event - - new_priority = MEMBERSHIP_PRIORITY.index(membership) - old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) - if old_priority < new_priority: - membership = prev_membership - - # otherwise, get the user's membership at the time of the event. - if membership is None: - membership_event = state.get((EventTypes.Member, user_id), None) - if membership_event: - membership = membership_event.membership - - # if the user was a member of the room at the time of the event, - # they can see it. - if membership == Membership.JOIN: - return event + allowed_user_ids.discard(user_id) + + if event.internal_metadata.outlier: + # Normally these can't be seen by clients, but we make an exception for + # for out-of-band membership events (eg, incoming invites, or rejections of + # said invite) for the user themselves. + if event.type == EventTypes.Member and event.state_key in allowed_user_ids: + logger.debug("Returning out-of-band-membership event %s", event) + return {event.state_key} + + return set() + + # First we get just the history visibility in case its shared/world-readable + # room. + visibility_state_map = await _get_state_map( + store, event, context, StateFilter.from_types([_HISTORY_VIS_KEY]) + ) - # otherwise, it depends on the room visibility. + visibility = get_effective_room_visibility_from_state(visibility_state_map) + if ( + _check_history_visibility(event, visibility, is_peeking=is_peeking) + == _CheckVisibility.ALLOWED + ): + return allowed_user_ids - if visibility == HistoryVisibility.JOINED: - # we weren't a member at the time of the event, so we can't - # see this event. - return None + # The history visibility isn't lax, so we now need to fetch the membership + # events of all the users. + + filter_list = [] + for user_id in allowed_user_ids: + filter_list.append((EventTypes.Member, user_id)) + filter_list.append((EventTypes.RoomHistoryVisibility, "")) + + state_filter = StateFilter.from_types(filter_list) + state_map = await _get_state_map(store, event, context, state_filter) + + # Now we check whether the membership allows each user to see the event. + return { + user_id + for user_id in allowed_user_ids + if _check_membership(user_id, event, visibility, state_map, is_peeking).allowed + } + + +async def _get_state_map( + store: DataStore, event: EventBase, context: EventContext, state_filter: StateFilter +) -> StateMap[EventBase]: + """Helper function for getting a `StateMap[EventBase]` from an `EventContext`""" + state_map = await context.get_prev_state_ids(state_filter) + + # Use events rather than event ids as content from the events are needed in + # _check_visibility + event_map = await store.get_events(state_map.values(), get_prev_content=False) + + updated_state_map = {} + for state_key, event_id in state_map.items(): + state_event = event_map.get(event_id) + if state_event: + updated_state_map[state_key] = state_event + + if event.is_state(): + current_state_key = (event.type, event.state_key) + # Add current event to updated_state_map, we need to do this here as it + # may not have been persisted to the db yet + updated_state_map[current_state_key] = event - elif visibility == HistoryVisibility.INVITED: - # user can also see the event if they were *invited* at the time - # of the event. - return event if membership == Membership.INVITE else None - - elif visibility == HistoryVisibility.SHARED and is_peeking: - # if the visibility is shared, users cannot see the event unless - # they have *subsequently* joined the room (or were members at the - # time, of course) - # - # XXX: if the user has subsequently joined and then left again, - # ideally we would share history up to the point they left. But - # we don't know when they left. We just treat it as though they - # never joined, and restrict access. + return updated_state_map + + +def _check_client_allowed_to_see_event( + user_id: str, + event: EventBase, + clock: Clock, + filter_send_to_client: bool, + is_peeking: bool, + always_include_ids: FrozenSet[str], + sender_ignored: bool, + retention_policy: RetentionPolicy, + state: Optional[StateMap[EventBase]], + sender_erased: bool, +) -> Optional[EventBase]: + """Check with the given user is allowed to see the given event + + See `filter_events_for_client` for details about args + + Args: + user_id + event + clock + filter_send_to_client + is_peeking + always_include_ids + sender_ignored: Whether the user is ignoring the event sender + retention_policy: The retention policy of the room + state: The state at the event, unless its an outlier + sender_erased: Whether the event sender has been marked as "erased" + + Returns: + None if the user cannot see this event at all + + a redacted copy of the event if they can only see a redacted + version + + the original event if they can see it as normal. + """ + # Only run some checks if these events aren't about to be sent to clients. This is + # because, if this is not the case, we're probably only checking if the users can + # see events in the room at that point in the DAG, and that shouldn't be decided + # on those checks. + if filter_send_to_client: + if ( + _check_filter_send_to_client(event, clock, retention_policy, sender_ignored) + == _CheckFilter.DENIED + ): return None - # the visibility is either shared or world_readable, and the user was - # not a member at the time. We allow it, provided the original sender - # has not requested their data to be erased, in which case, we return - # a redacted version. - if erased_senders[event.sender]: - return prune_event(event) + if event.event_id in always_include_ids: + return event + + # we need to handle outliers separately, since we don't have the room state. + if event.internal_metadata.outlier: + # Normally these can't be seen by clients, but we make an exception for + # for out-of-band membership events (eg, incoming invites, or rejections of + # said invite) for the user themselves. + if event.type == EventTypes.Member and event.state_key == user_id: + logger.debug("Returning out-of-band-membership event %s", event) + return event + + return None + + if state is None: + raise Exception("Missing state for non-outlier event") + # get the room_visibility at the time of the event. + visibility = get_effective_room_visibility_from_state(state) + + # Check if the room has lax history visibility, allowing us to skip + # membership checks. + # + # We can only do this check if the sender has *not* been erased, as if they + # have we need to check the user's membership. + if ( + not sender_erased + and _check_history_visibility(event, visibility, is_peeking) + == _CheckVisibility.ALLOWED + ): return event - # Check each event: gives an iterable of None or (a potentially modified) - # EventBase. - filtered_events = map(allowed, events) + membership_result = _check_membership(user_id, event, visibility, state, is_peeking) + if not membership_result.allowed: + return None - # Turn it into a list and remove None entries before returning. - return [ev for ev in filtered_events if ev] + # If the sender has been erased and the user was not joined at the time, we + # must only return the redacted form. + if sender_erased and not membership_result.joined: + event = prune_event(event) + + return event + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class _CheckMembershipReturn: + "Return value of _check_membership" + allowed: bool + joined: bool + + +def _check_membership( + user_id: str, + event: EventBase, + visibility: str, + state: StateMap[EventBase], + is_peeking: bool, +) -> _CheckMembershipReturn: + """Check whether the user can see the event due to their membership + + Returns: + True if they can, False if they can't, plus the membership of the user + at the event. + """ + # If the event is the user's own membership event, use the 'most joined' + # membership + membership = None + if event.type == EventTypes.Member and event.state_key == user_id: + membership = event.content.get("membership", None) + if membership not in MEMBERSHIP_PRIORITY: + membership = "leave" + + prev_content = event.unsigned.get("prev_content", {}) + prev_membership = prev_content.get("membership", None) + if prev_membership not in MEMBERSHIP_PRIORITY: + prev_membership = "leave" + + # Always allow the user to see their own leave events, otherwise + # they won't see the room disappear if they reject the invite + # + # (Note this doesn't work for out-of-band invite rejections, which don't + # have prev_state populated. They are handled above in the outlier code.) + if membership == "leave" and ( + prev_membership == "join" or prev_membership == "invite" + ): + return _CheckMembershipReturn(True, membership == Membership.JOIN) + + new_priority = MEMBERSHIP_PRIORITY.index(membership) + old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) + if old_priority < new_priority: + membership = prev_membership + + # otherwise, get the user's membership at the time of the event. + if membership is None: + membership_event = state.get((EventTypes.Member, user_id), None) + if membership_event: + membership = membership_event.membership + + # if the user was a member of the room at the time of the event, + # they can see it. + if membership == Membership.JOIN: + return _CheckMembershipReturn(True, True) + + # otherwise, it depends on the room visibility. + + if visibility == HistoryVisibility.JOINED: + # we weren't a member at the time of the event, so we can't + # see this event. + return _CheckMembershipReturn(False, False) + + elif visibility == HistoryVisibility.INVITED: + # user can also see the event if they were *invited* at the time + # of the event. + return _CheckMembershipReturn(membership == Membership.INVITE, False) + + elif visibility == HistoryVisibility.SHARED and is_peeking: + # if the visibility is shared, users cannot see the event unless + # they have *subsequently* joined the room (or were members at the + # time, of course) + # + # XXX: if the user has subsequently joined and then left again, + # ideally we would share history up to the point they left. But + # we don't know when they left. We just treat it as though they + # never joined, and restrict access. + return _CheckMembershipReturn(False, False) + + # The visibility is either shared or world_readable, and the user was + # not a member at the time. We allow it. + return _CheckMembershipReturn(True, False) + + +class _CheckFilter(Enum): + MAYBE_ALLOWED = auto() + DENIED = auto() + + +def _check_filter_send_to_client( + event: EventBase, + clock: Clock, + retention_policy: RetentionPolicy, + sender_ignored: bool, +) -> _CheckFilter: + """Apply checks for sending events to client + + Returns: + True if might be allowed to be sent to clients, False if definitely not. + """ + + if event.type == EventTypes.Dummy: + return _CheckFilter.DENIED + + if not event.is_state() and sender_ignored: + return _CheckFilter.DENIED + + # Until MSC2261 has landed we can't redact malicious alias events, so for + # now we temporarily filter out m.room.aliases entirely to mitigate + # abuse, while we spec a better solution to advertising aliases + # on rooms. + if event.type == EventTypes.Aliases: + return _CheckFilter.DENIED + + # Don't try to apply the room's retention policy if the event is a state + # event, as MSC1763 states that retention is only considered for non-state + # events. + if not event.is_state(): + max_lifetime = retention_policy.max_lifetime + + if max_lifetime is not None: + oldest_allowed_ts = clock.time_msec() - max_lifetime + + if event.origin_server_ts < oldest_allowed_ts: + return _CheckFilter.DENIED + + return _CheckFilter.MAYBE_ALLOWED + + +class _CheckVisibility(Enum): + ALLOWED = auto() + MAYBE_DENIED = auto() + + +def _check_history_visibility( + event: EventBase, visibility: str, is_peeking: bool +) -> _CheckVisibility: + """Check if event is allowed to be seen due to lax history visibility. + + Returns: + True if user can definitely see the event, False if maybe not. + """ + # Always allow history visibility events on boundaries. This is done + # by setting the effective visibility to the least restrictive + # of the old vs new. + if event.type == EventTypes.RoomHistoryVisibility: + prev_content = event.unsigned.get("prev_content", {}) + prev_visibility = prev_content.get("history_visibility", None) + + if prev_visibility not in VISIBILITY_PRIORITY: + prev_visibility = HistoryVisibility.SHARED + + new_priority = VISIBILITY_PRIORITY.index(visibility) + old_priority = VISIBILITY_PRIORITY.index(prev_visibility) + if old_priority < new_priority: + visibility = prev_visibility + + if visibility == HistoryVisibility.SHARED and not is_peeking: + return _CheckVisibility.ALLOWED + elif visibility == HistoryVisibility.WORLD_READABLE: + return _CheckVisibility.ALLOWED + + return _CheckVisibility.MAYBE_DENIED def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: """Get the actual history vis, from a state map including the history_visibility event - Handles missing and invalid history visibility events. """ visibility_event = state.get(_HISTORY_VIS_KEY, None) diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py index 18649c2c05..c86f783c5b 100644 --- a/tests/api/test_ratelimiting.py +++ b/tests/api/test_ratelimiting.py @@ -314,3 +314,77 @@ class TestRatelimiter(unittest.HomeserverTestCase): # Check that we get rate limited after using that token. self.assertFalse(consume_at(11.1)) + + def test_record_action_which_doesnt_fill_bucket(self) -> None: + limiter = Ratelimiter( + store=self.hs.get_datastores().main, clock=None, rate_hz=0.1, burst_count=3 + ) + + # Observe two actions, leaving room in the bucket for one more. + limiter.record_action(requester=None, key="a", n_actions=2, _time_now_s=0.0) + + # We should be able to take a new action now. + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=0.0) + ) + self.assertTrue(success) + + # ... but not two. + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=0.0) + ) + self.assertFalse(success) + + def test_record_action_which_fills_bucket(self) -> None: + limiter = Ratelimiter( + store=self.hs.get_datastores().main, clock=None, rate_hz=0.1, burst_count=3 + ) + + # Observe three actions, filling up the bucket. + limiter.record_action(requester=None, key="a", n_actions=3, _time_now_s=0.0) + + # We should be unable to take a new action now. + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=0.0) + ) + self.assertFalse(success) + + # If we wait 10 seconds to leak a token, we should be able to take one action... + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=10.0) + ) + self.assertTrue(success) + + # ... but not two. + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=10.0) + ) + self.assertFalse(success) + + def test_record_action_which_overfills_bucket(self) -> None: + limiter = Ratelimiter( + store=self.hs.get_datastores().main, clock=None, rate_hz=0.1, burst_count=3 + ) + + # Observe four actions, exceeding the bucket. + limiter.record_action(requester=None, key="a", n_actions=4, _time_now_s=0.0) + + # We should be prevented from taking a new action now. + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=0.0) + ) + self.assertFalse(success) + + # If we wait 10 seconds to leak a token, we should be unable to take an action + # because the bucket is still full. + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=10.0) + ) + self.assertFalse(success) + + # But after another 10 seconds we leak a second token, giving us room for + # action. + success, _ = self.get_success_or_raise( + limiter.can_do_action(requester=None, key="a", _time_now_s=20.0) + ) + self.assertTrue(success) diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py index 9f1115dd23..c6dd99316a 100644 --- a/tests/federation/test_complexity.py +++ b/tests/federation/test_complexity.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from http import HTTPStatus from unittest.mock import Mock from synapse.api.errors import Codes, SynapseError @@ -50,7 +51,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase): channel = self.make_signed_federation_request( "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,) ) - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) complexity = channel.json_body["v1"] self.assertTrue(complexity > 0, complexity) @@ -62,7 +63,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase): channel = self.make_signed_federation_request( "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,) ) - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) complexity = channel.json_body["v1"] self.assertEqual(complexity, 1.23) diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py index 268a48d7ba..50e376f695 100644 --- a/tests/federation/test_federation_client.py +++ b/tests/federation/test_federation_client.py @@ -22,6 +22,7 @@ from twisted.python.failure import Failure from twisted.test.proto_helpers import MemoryReactor from synapse.api.room_versions import RoomVersions +from synapse.events import EventBase from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock @@ -38,42 +39,46 @@ class FederationClientTest(FederatingHomeserverTestCase): self._mock_agent = mock.create_autospec(twisted.web.client.Agent, spec_set=True) homeserver.get_federation_http_client().agent = self._mock_agent - def test_get_room_state(self): - creator = f"@creator:{self.OTHER_SERVER_NAME}" - test_room_id = "!room_id" + # Move clock up to somewhat realistic time so the PDU destination retry + # works (`now` needs to be larger than `0 + PDU_RETRY_TIME_MS`). + self.reactor.advance(1000000000) + + self.creator = f"@creator:{self.OTHER_SERVER_NAME}" + self.test_room_id = "!room_id" + def test_get_room_state(self): # mock up some events to use in the response. # In real life, these would have things in `prev_events` and `auth_events`, but that's # a bit annoying to mock up, and the code under test doesn't care, so we don't bother. - create_event_dict = self.add_hashes_and_signatures( + create_event_dict = self.add_hashes_and_signatures_from_other_server( { - "room_id": test_room_id, + "room_id": self.test_room_id, "type": "m.room.create", "state_key": "", - "sender": creator, - "content": {"creator": creator}, + "sender": self.creator, + "content": {"creator": self.creator}, "prev_events": [], "auth_events": [], "origin_server_ts": 500, } ) - member_event_dict = self.add_hashes_and_signatures( + member_event_dict = self.add_hashes_and_signatures_from_other_server( { - "room_id": test_room_id, + "room_id": self.test_room_id, "type": "m.room.member", - "sender": creator, - "state_key": creator, + "sender": self.creator, + "state_key": self.creator, "content": {"membership": "join"}, "prev_events": [], "auth_events": [], "origin_server_ts": 600, } ) - pl_event_dict = self.add_hashes_and_signatures( + pl_event_dict = self.add_hashes_and_signatures_from_other_server( { - "room_id": test_room_id, + "room_id": self.test_room_id, "type": "m.room.power_levels", - "sender": creator, + "sender": self.creator, "state_key": "", "content": {}, "prev_events": [], @@ -102,8 +107,8 @@ class FederationClientTest(FederatingHomeserverTestCase): # now fire off the request state_resp, auth_resp = self.get_success( self.hs.get_federation_client().get_room_state( - "yet_another_server", - test_room_id, + "yet.another.server", + self.test_room_id, "event_id", RoomVersions.V9, ) @@ -112,7 +117,7 @@ class FederationClientTest(FederatingHomeserverTestCase): # check the right call got made to the agent self._mock_agent.request.assert_called_once_with( b"GET", - b"matrix://yet_another_server/_matrix/federation/v1/state/%21room_id?event_id=event_id", + b"matrix://yet.another.server/_matrix/federation/v1/state/%21room_id?event_id=event_id", headers=mock.ANY, bodyProducer=None, ) @@ -130,6 +135,102 @@ class FederationClientTest(FederatingHomeserverTestCase): ["m.room.create", "m.room.member", "m.room.power_levels"], ) + def test_get_pdu_returns_nothing_when_event_does_not_exist(self): + """No event should be returned when the event does not exist""" + remote_pdu = self.get_success( + self.hs.get_federation_client().get_pdu( + ["yet.another.server"], + "event_should_not_exist", + RoomVersions.V9, + ) + ) + self.assertEqual(remote_pdu, None) + + def test_get_pdu(self): + """Test to make sure an event is returned by `get_pdu()`""" + self._get_pdu_once() + + def test_get_pdu_event_from_cache_is_pristine(self): + """Test that modifications made to events returned by `get_pdu()` + do not propagate back to to the internal cache (events returned should + be a copy). + """ + + # Get the PDU in the cache + remote_pdu = self._get_pdu_once() + + # Modify the the event reference. + # This change should not make it back to the `_get_pdu_cache`. + remote_pdu.internal_metadata.outlier = True + + # Get the event again. This time it should read it from cache. + remote_pdu2 = self.get_success( + self.hs.get_federation_client().get_pdu( + ["yet.another.server"], + remote_pdu.event_id, + RoomVersions.V9, + ) + ) + + # Sanity check that we are working against the same event + self.assertEqual(remote_pdu.event_id, remote_pdu2.event_id) + + # Make sure the event does not include modification from earlier + self.assertIsNotNone(remote_pdu2) + self.assertEqual(remote_pdu2.internal_metadata.outlier, False) + + def _get_pdu_once(self) -> EventBase: + """Retrieve an event via `get_pdu()` and assert that an event was returned. + Also used to prime the cache for subsequent test logic. + """ + message_event_dict = self.add_hashes_and_signatures_from_other_server( + { + "room_id": self.test_room_id, + "type": "m.room.message", + "sender": self.creator, + "state_key": "", + "content": {}, + "prev_events": [], + "auth_events": [], + "origin_server_ts": 700, + "depth": 10, + } + ) + + # mock up the response, and have the agent return it + self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed( + _mock_response( + { + "origin": "yet.another.server", + "origin_server_ts": 900, + "pdus": [ + message_event_dict, + ], + } + ) + ) + + remote_pdu = self.get_success( + self.hs.get_federation_client().get_pdu( + ["yet.another.server"], + "event_id", + RoomVersions.V9, + ) + ) + + # check the right call got made to the agent + self._mock_agent.request.assert_called_once_with( + b"GET", + b"matrix://yet.another.server/_matrix/federation/v1/event/event_id", + headers=mock.ANY, + bodyProducer=None, + ) + + self.assertIsNotNone(remote_pdu) + self.assertEqual(remote_pdu.internal_metadata.outlier, False) + + return remote_pdu + def _mock_response(resp: JsonDict): body = json.dumps(resp).encode("utf-8") diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py index 413b3c9426..3a6ef221ae 100644 --- a/tests/federation/test_federation_server.py +++ b/tests/federation/test_federation_server.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from http import HTTPStatus from parameterized import parameterized @@ -20,7 +21,6 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.config.server import DEFAULT_ROOM_VERSION -from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.events import make_event_from_dict from synapse.federation.federation_server import server_matches_acl_event from synapse.rest import admin @@ -59,7 +59,7 @@ class FederationServerTests(unittest.FederatingHomeserverTestCase): "/_matrix/federation/v1/get_missing_events/%s" % (room_1,), query_content, ) - self.assertEqual(400, channel.code, channel.result) + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, channel.result) self.assertEqual(channel.json_body["errcode"], "M_NOT_JSON") @@ -120,7 +120,7 @@ class StateQueryTests(unittest.FederatingHomeserverTestCase): channel = self.make_signed_federation_request( "GET", "/_matrix/federation/v1/state/%s?event_id=xyz" % (room_1,) ) - self.assertEqual(403, channel.code, channel.result) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, channel.result) self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") @@ -148,13 +148,13 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase): tok2 = self.login("fozzie", "bear") self.helper.join(self._room_id, second_member_user_id, tok=tok2) - def _make_join(self, user_id) -> JsonDict: + def _make_join(self, user_id: str) -> JsonDict: channel = self.make_signed_federation_request( "GET", f"/_matrix/federation/v1/make_join/{self._room_id}/{user_id}" f"?ver={DEFAULT_ROOM_VERSION}", ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) return channel.json_body def test_send_join(self): @@ -163,18 +163,16 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase): join_result = self._make_join(joining_user) join_event_dict = join_result["event"] - add_hashes_and_signatures( - KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION], + self.add_hashes_and_signatures_from_other_server( join_event_dict, - signature_name=self.OTHER_SERVER_NAME, - signing_key=self.OTHER_SERVER_SIGNATURE_KEY, + KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION], ) channel = self.make_signed_federation_request( "PUT", f"/_matrix/federation/v2/send_join/{self._room_id}/x", content=join_event_dict, ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) # we should get complete room state back returned_state = [ @@ -220,18 +218,16 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase): join_result = self._make_join(joining_user) join_event_dict = join_result["event"] - add_hashes_and_signatures( - KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION], + self.add_hashes_and_signatures_from_other_server( join_event_dict, - signature_name=self.OTHER_SERVER_NAME, - signing_key=self.OTHER_SERVER_SIGNATURE_KEY, + KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION], ) channel = self.make_signed_federation_request( "PUT", f"/_matrix/federation/v2/send_join/{self._room_id}/x?org.matrix.msc3706.partial_state=true", content=join_event_dict, ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) # expect a reduced room state returned_state = [ @@ -264,6 +260,67 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase): ) self.assertEqual(r[("m.room.member", joining_user)].membership, "join") + @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 3}}) + def test_make_join_respects_room_join_rate_limit(self) -> None: + # In the test setup, two users join the room. Since the rate limiter burst + # count is 3, a new make_join request to the room should be accepted. + + joining_user = "@ronniecorbett:" + self.OTHER_SERVER_NAME + self._make_join(joining_user) + + # Now have a new local user join the room. This saturates the rate limiter + # bucket, so the next make_join should be denied. + new_local_user = self.register_user("animal", "animal") + token = self.login("animal", "animal") + self.helper.join(self._room_id, new_local_user, tok=token) + + joining_user = "@ronniebarker:" + self.OTHER_SERVER_NAME + channel = self.make_signed_federation_request( + "GET", + f"/_matrix/federation/v1/make_join/{self._room_id}/{joining_user}" + f"?ver={DEFAULT_ROOM_VERSION}", + ) + self.assertEqual(channel.code, HTTPStatus.TOO_MANY_REQUESTS, channel.json_body) + + @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 3}}) + def test_send_join_contributes_to_room_join_rate_limit_and_is_limited(self) -> None: + # Make two make_join requests up front. (These are rate limited, but do not + # contribute to the rate limit.) + join_event_dicts = [] + for i in range(2): + joining_user = f"@misspiggy{i}:{self.OTHER_SERVER_NAME}" + join_result = self._make_join(joining_user) + join_event_dict = join_result["event"] + self.add_hashes_and_signatures_from_other_server( + join_event_dict, + KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION], + ) + join_event_dicts.append(join_event_dict) + + # In the test setup, two users join the room. Since the rate limiter burst + # count is 3, the first send_join should be accepted... + channel = self.make_signed_federation_request( + "PUT", + f"/_matrix/federation/v2/send_join/{self._room_id}/join0", + content=join_event_dicts[0], + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # ... but the second should be denied. + channel = self.make_signed_federation_request( + "PUT", + f"/_matrix/federation/v2/send_join/{self._room_id}/join1", + content=join_event_dicts[1], + ) + self.assertEqual(channel.code, HTTPStatus.TOO_MANY_REQUESTS, channel.json_body) + + # NB: we could write a test which checks that the send_join event is seen + # by other workers over replication, and that they update their rate limit + # buckets accordingly. I'm going to assume that the join event gets sent over + # replication, at which point the tests.handlers.room_member test + # test_local_users_joining_on_another_worker_contribute_to_rate_limit + # is probably sufficient to reassure that the bucket is updated. + def _create_acl_event(content): return make_event_from_dict( diff --git a/tests/federation/transport/test_knocking.py b/tests/federation/transport/test_knocking.py index d21c11b716..0d048207b7 100644 --- a/tests/federation/transport/test_knocking.py +++ b/tests/federation/transport/test_knocking.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import OrderedDict +from http import HTTPStatus from typing import Dict, List from synapse.api.constants import EventTypes, JoinRules, Membership @@ -255,7 +256,7 @@ class FederationKnockingTestCase( RoomVersions.V7.identifier, ), ) - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) # Note: We don't expect the knock membership event to be sent over federation as # part of the stripped room state, as the knocking homeserver already has that @@ -293,7 +294,7 @@ class FederationKnockingTestCase( % (room_id, signed_knock_event.event_id), signed_knock_event_json, ) - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) # Check that we got the stripped room state in return room_state_events = channel.json_body["knock_state_events"] diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index d96d5aa138..b17af2725b 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -50,7 +50,7 @@ class AppServiceHandlerTestCase(unittest.TestCase): self.mock_scheduler = Mock() hs = Mock() hs.get_datastores.return_value = Mock(main=self.mock_store) - self.mock_store.get_received_ts.return_value = make_awaitable(0) + self.mock_store.get_appservice_last_pos.return_value = make_awaitable(None) self.mock_store.set_appservice_last_pos.return_value = make_awaitable(None) self.mock_store.set_appservice_stream_type_pos.return_value = make_awaitable( None @@ -76,9 +76,9 @@ class AppServiceHandlerTestCase(unittest.TestCase): event = Mock( sender="@someone:anywhere", type="m.room.message", room_id="!foo:bar" ) - self.mock_store.get_new_events_for_appservice.side_effect = [ - make_awaitable((0, [])), - make_awaitable((1, [event])), + self.mock_store.get_all_new_events_stream.side_effect = [ + make_awaitable((0, [], {})), + make_awaitable((1, [event], {event.event_id: 0})), ] self.handler.notify_interested_services(RoomStreamToken(None, 1)) @@ -95,8 +95,8 @@ class AppServiceHandlerTestCase(unittest.TestCase): event = Mock(sender=user_id, type="m.room.message", room_id="!foo:bar") self.mock_as_api.query_user.return_value = make_awaitable(True) - self.mock_store.get_new_events_for_appservice.side_effect = [ - make_awaitable((0, [event])), + self.mock_store.get_all_new_events_stream.side_effect = [ + make_awaitable((0, [event], {event.event_id: 0})), ] self.handler.notify_interested_services(RoomStreamToken(None, 0)) @@ -112,8 +112,8 @@ class AppServiceHandlerTestCase(unittest.TestCase): event = Mock(sender=user_id, type="m.room.message", room_id="!foo:bar") self.mock_as_api.query_user.return_value = make_awaitable(True) - self.mock_store.get_new_events_for_appservice.side_effect = [ - make_awaitable((0, [event])), + self.mock_store.get_all_new_events_stream.side_effect = [ + make_awaitable((0, [event], {event.event_id: 0})), ] self.handler.notify_interested_services(RoomStreamToken(None, 0)) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 9afba7b0e8..8a0bb91f40 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import List, cast +from typing import cast from unittest import TestCase from twisted.test.proto_helpers import MemoryReactor @@ -50,8 +50,6 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase): hs = self.setup_test_homeserver(federation_http_client=None) self.handler = hs.get_federation_handler() self.store = hs.get_datastores().main - self.state_storage_controller = hs.get_storage_controllers().state - self._event_auth_handler = hs.get_event_auth_handler() return hs def test_exchange_revoked_invite(self) -> None: @@ -225,9 +223,10 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase): # we need a user on the remote server to be a member, so that we can send # extremity-causing events. + remote_server_user_id = f"@user:{self.OTHER_SERVER_NAME}" self.get_success( event_injection.inject_member_event( - self.hs, room_id, f"@user:{self.OTHER_SERVER_NAME}", "join" + self.hs, room_id, remote_server_user_id, "join" ) ) @@ -247,9 +246,15 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase): # create more than is 5 which corresponds to the number of backward # extremities we slice off in `_maybe_backfill_inner` federation_event_handler = self.hs.get_federation_event_handler() + auth_events = [ + ev + for ev in current_state + if (ev.type, ev.state_key) + in {("m.room.create", ""), ("m.room.member", remote_server_user_id)} + ] for _ in range(0, 8): event = make_event_from_dict( - self.add_hashes_and_signatures( + self.add_hashes_and_signatures_from_other_server( { "origin_server_ts": 1, "type": "m.room.message", @@ -258,15 +263,14 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase): "body": "message connected to fake event", }, "room_id": room_id, - "sender": f"@user:{self.OTHER_SERVER_NAME}", + "sender": remote_server_user_id, "prev_events": [ ev1.event_id, # We're creating an backward extremity each time thanks # to this fake event generate_fake_event_id(), ], - # lazy: *everything* is an auth event - "auth_events": [ev.event_id for ev in current_state], + "auth_events": [ev.event_id for ev in auth_events], "depth": ev1.depth + 1, }, room_version, @@ -308,142 +312,6 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase): ) self.get_success(d) - def test_backfill_floating_outlier_membership_auth(self) -> None: - """ - As the local homeserver, check that we can properly process a federated - event from the OTHER_SERVER with auth_events that include a floating - membership event from the OTHER_SERVER. - - Regression test, see #10439. - """ - OTHER_SERVER = "otherserver" - OTHER_USER = "@otheruser:" + OTHER_SERVER - - # create the room - user_id = self.register_user("kermit", "test") - tok = self.login("kermit", "test") - room_id = self.helper.create_room_as( - room_creator=user_id, - is_public=True, - tok=tok, - extra_content={ - "preset": "public_chat", - }, - ) - room_version = self.get_success(self.store.get_room_version(room_id)) - - prev_event_ids = self.get_success(self.store.get_prev_events_for_room(room_id)) - ( - most_recent_prev_event_id, - most_recent_prev_event_depth, - ) = self.get_success(self.store.get_max_depth_of(prev_event_ids)) - # mapping from (type, state_key) -> state_event_id - assert most_recent_prev_event_id is not None - prev_state_map = self.get_success( - self.state_storage_controller.get_state_ids_for_event( - most_recent_prev_event_id - ) - ) - # List of state event ID's - prev_state_ids = list(prev_state_map.values()) - auth_event_ids = prev_state_ids - auth_events = list( - self.get_success(self.store.get_events(auth_event_ids)).values() - ) - - # build a floating outlier member state event - fake_prev_event_id = "$" + random_string(43) - member_event_dict = { - "type": EventTypes.Member, - "content": { - "membership": "join", - }, - "state_key": OTHER_USER, - "room_id": room_id, - "sender": OTHER_USER, - "depth": most_recent_prev_event_depth, - "prev_events": [fake_prev_event_id], - "origin_server_ts": self.clock.time_msec(), - "signatures": {OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"}}, - } - builder = self.hs.get_event_builder_factory().for_room_version( - room_version, member_event_dict - ) - member_event = self.get_success( - builder.build( - prev_event_ids=member_event_dict["prev_events"], - auth_event_ids=self._event_auth_handler.compute_auth_events( - builder, - prev_state_map, - for_verification=False, - ), - depth=member_event_dict["depth"], - ) - ) - # Override the signature added from "test" homeserver that we created the event with - member_event.signatures = member_event_dict["signatures"] - - # Add the new member_event to the StateMap - updated_state_map = dict(prev_state_map) - updated_state_map[ - (member_event.type, member_event.state_key) - ] = member_event.event_id - auth_events.append(member_event) - - # build and send an event authed based on the member event - message_event_dict = { - "type": EventTypes.Message, - "content": {}, - "room_id": room_id, - "sender": OTHER_USER, - "depth": most_recent_prev_event_depth, - "prev_events": prev_event_ids.copy(), - "origin_server_ts": self.clock.time_msec(), - "signatures": {OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"}}, - } - builder = self.hs.get_event_builder_factory().for_room_version( - room_version, message_event_dict - ) - message_event = self.get_success( - builder.build( - prev_event_ids=message_event_dict["prev_events"], - auth_event_ids=self._event_auth_handler.compute_auth_events( - builder, - updated_state_map, - for_verification=False, - ), - depth=message_event_dict["depth"], - ) - ) - # Override the signature added from "test" homeserver that we created the event with - message_event.signatures = message_event_dict["signatures"] - - # Stub the /event_auth response from the OTHER_SERVER - async def get_event_auth( - destination: str, room_id: str, event_id: str - ) -> List[EventBase]: - return [ - event_from_pdu_json(ae.get_pdu_json(), room_version=room_version) - for ae in auth_events - ] - - self.handler.federation_client.get_event_auth = get_event_auth # type: ignore[assignment] - - with LoggingContext("receive_pdu"): - # Fake the OTHER_SERVER federating the message event over to our local homeserver - d = run_in_background( - self.hs.get_federation_event_handler().on_receive_pdu, - OTHER_SERVER, - message_event, - ) - self.get_success(d) - - # Now try and get the events on our local homeserver - stored_event = self.get_success( - self.store.get_event(message_event.event_id, allow_none=True) - ) - self.assertTrue(stored_event is not None) - @unittest.override_config( {"rc_invites": {"per_user": {"per_second": 0.5, "burst_count": 3}}} ) diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 1a36c25c41..51c8dd6498 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -98,14 +98,13 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase): auth_event_ids = [ initial_state_map[("m.room.create", "")], initial_state_map[("m.room.power_levels", "")], - initial_state_map[("m.room.join_rules", "")], member_event.event_id, ] # mock up a load of state events which we are missing state_events = [ make_event_from_dict( - self.add_hashes_and_signatures( + self.add_hashes_and_signatures_from_other_server( { "type": "test_state_type", "state_key": f"state_{i}", @@ -132,7 +131,7 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase): # Depending on the test, we either persist this upfront (as an outlier), # or let the server request it. prev_event = make_event_from_dict( - self.add_hashes_and_signatures( + self.add_hashes_and_signatures_from_other_server( { "type": "test_regular_type", "room_id": room_id, @@ -166,7 +165,7 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase): # mock up a regular event to pass into _process_pulled_event pulled_event = make_event_from_dict( - self.add_hashes_and_signatures( + self.add_hashes_and_signatures_from_other_server( { "type": "test_regular_type", "room_id": room_id, diff --git a/tests/handlers/test_password_providers.py b/tests/handlers/test_password_providers.py index 82b3bb3b73..4c62449c89 100644 --- a/tests/handlers/test_password_providers.py +++ b/tests/handlers/test_password_providers.py @@ -14,6 +14,7 @@ """Tests for the password_auth_provider interface""" +from http import HTTPStatus from typing import Any, Type, Union from unittest.mock import Mock @@ -188,14 +189,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # check_password must return an awaitable mock_password_provider.check_password.return_value = make_awaitable(True) channel = self._send_password_login("u", "p") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertEqual("@u:test", channel.json_body["user_id"]) mock_password_provider.check_password.assert_called_once_with("@u:test", "p") mock_password_provider.reset_mock() # login with mxid should work too channel = self._send_password_login("@u:bz", "p") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertEqual("@u:bz", channel.json_body["user_id"]) mock_password_provider.check_password.assert_called_once_with("@u:bz", "p") mock_password_provider.reset_mock() @@ -204,7 +205,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # in these cases, but at least we can guard against the API changing # unexpectedly channel = self._send_password_login(" USER🙂NAME ", " pASS\U0001F622word ") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertEqual("@ USER🙂NAME :test", channel.json_body["user_id"]) mock_password_provider.check_password.assert_called_once_with( "@ USER🙂NAME :test", " pASS😢word " @@ -258,10 +259,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # check_password must return an awaitable mock_password_provider.check_password.return_value = make_awaitable(False) channel = self._send_password_login("u", "p") - self.assertEqual(channel.code, 403, channel.result) + self.assertEqual(channel.code, HTTPStatus.FORBIDDEN, channel.result) channel = self._send_password_login("localuser", "localpass") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertEqual("@localuser:test", channel.json_body["user_id"]) @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider)) @@ -382,7 +383,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # login shouldn't work and should be rejected with a 400 ("unknown login type") channel = self._send_password_login("u", "p") - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) mock_password_provider.check_password.assert_not_called() @override_config(legacy_providers_config(LegacyCustomAuthProvider)) @@ -406,14 +407,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # login with missing param should be rejected channel = self._send_login("test.login_type", "u") - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) mock_password_provider.check_auth.assert_not_called() mock_password_provider.check_auth.return_value = make_awaitable( ("@user:bz", None) ) channel = self._send_login("test.login_type", "u", test_field="y") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertEqual("@user:bz", channel.json_body["user_id"]) mock_password_provider.check_auth.assert_called_once_with( "u", "test.login_type", {"test_field": "y"} @@ -427,7 +428,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): ("@ MALFORMED! :bz", None) ) channel = self._send_login("test.login_type", " USER🙂NAME ", test_field=" abc ") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertEqual("@ MALFORMED! :bz", channel.json_body["user_id"]) mock_password_provider.check_auth.assert_called_once_with( " USER🙂NAME ", "test.login_type", {"test_field": " abc "} @@ -510,7 +511,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): ("@user:bz", callback) ) channel = self._send_login("test.login_type", "u", test_field="y") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertEqual("@user:bz", channel.json_body["user_id"]) mock_password_provider.check_auth.assert_called_once_with( "u", "test.login_type", {"test_field": "y"} @@ -549,7 +550,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # login shouldn't work and should be rejected with a 400 ("unknown login type") channel = self._send_password_login("localuser", "localpass") - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) mock_password_provider.check_auth.assert_not_called() @override_config( @@ -584,7 +585,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # login shouldn't work and should be rejected with a 400 ("unknown login type") channel = self._send_password_login("localuser", "localpass") - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) mock_password_provider.check_auth.assert_not_called() @override_config( @@ -615,7 +616,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # login shouldn't work and should be rejected with a 400 ("unknown login type") channel = self._send_password_login("localuser", "localpass") - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) mock_password_provider.check_auth.assert_not_called() mock_password_provider.check_password.assert_not_called() @@ -646,13 +647,13 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): ("@localuser:test", None) ) channel = self._send_login("test.login_type", "localuser", test_field="") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) tok1 = channel.json_body["access_token"] channel = self._send_login( "test.login_type", "localuser", test_field="", device_id="dev2" ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) # make the initial request which returns a 401 channel = self._delete_device(tok1, "dev2") @@ -721,7 +722,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): # password login shouldn't work and should be rejected with a 400 # ("unknown login type") channel = self._send_password_login("localuser", "localpass") - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) def test_on_logged_out(self): """Tests that the on_logged_out callback is called when the user logs out.""" @@ -884,7 +885,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): }, access_token=tok, ) - self.assertEqual(channel.code, 403, channel.result) + self.assertEqual(channel.code, HTTPStatus.FORBIDDEN, channel.result) self.assertEqual( channel.json_body["errcode"], Codes.THREEPID_DENIED, @@ -906,7 +907,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): }, access_token=tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.assertIn("sid", channel.json_body) m.assert_called_once_with("email", "bar@test.com", registration) @@ -949,12 +950,12 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase): "register", {"auth": {"session": session, "type": LoginType.DUMMY}}, ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) return channel.json_body def _get_login_flows(self) -> JsonDict: channel = self.make_request("GET", "/_matrix/client/r0/login") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) return channel.json_body["flows"] def _send_password_login(self, user: str, password: str) -> FakeChannel: diff --git a/tests/handlers/test_room_member.py b/tests/handlers/test_room_member.py new file mode 100644 index 0000000000..254e7e4b80 --- /dev/null +++ b/tests/handlers/test_room_member.py @@ -0,0 +1,290 @@ +from http import HTTPStatus +from unittest.mock import Mock, patch + +from twisted.test.proto_helpers import MemoryReactor + +import synapse.rest.admin +import synapse.rest.client.login +import synapse.rest.client.room +from synapse.api.constants import EventTypes, Membership +from synapse.api.errors import LimitExceededError +from synapse.crypto.event_signing import add_hashes_and_signatures +from synapse.events import FrozenEventV3 +from synapse.federation.federation_client import SendJoinResult +from synapse.server import HomeServer +from synapse.types import UserID, create_requester +from synapse.util import Clock + +from tests.replication._base import RedisMultiWorkerStreamTestCase +from tests.server import make_request +from tests.test_utils import make_awaitable +from tests.unittest import FederatingHomeserverTestCase, override_config + + +class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + synapse.rest.client.login.register_servlets, + synapse.rest.client.room.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.handler = hs.get_room_member_handler() + + # Create three users. + self.alice = self.register_user("alice", "pass") + self.alice_token = self.login("alice", "pass") + self.bob = self.register_user("bob", "pass") + self.bob_token = self.login("bob", "pass") + self.chris = self.register_user("chris", "pass") + self.chris_token = self.login("chris", "pass") + + # Create a room on this homeserver. Note that this counts as a join: it + # contributes to the rate limter's count of actions + self.room_id = self.helper.create_room_as(self.alice, tok=self.alice_token) + + self.intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" + + @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 2}}) + def test_local_user_local_joins_contribute_to_limit_and_are_limited(self) -> None: + # The rate limiter has accumulated one token from Alice's join after the create + # event. + # Try joining the room as Bob. + self.get_success( + self.handler.update_membership( + requester=create_requester(self.bob), + target=UserID.from_string(self.bob), + room_id=self.room_id, + action=Membership.JOIN, + ) + ) + + # The rate limiter bucket is full. A second join should be denied. + self.get_failure( + self.handler.update_membership( + requester=create_requester(self.chris), + target=UserID.from_string(self.chris), + room_id=self.room_id, + action=Membership.JOIN, + ), + LimitExceededError, + ) + + @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 2}}) + def test_local_user_profile_edits_dont_contribute_to_limit(self) -> None: + # The rate limiter has accumulated one token from Alice's join after the create + # event. Alice should still be able to change her displayname. + self.get_success( + self.handler.update_membership( + requester=create_requester(self.alice), + target=UserID.from_string(self.alice), + room_id=self.room_id, + action=Membership.JOIN, + content={"displayname": "Alice Cooper"}, + ) + ) + + # Still room in the limiter bucket. Chris's join should be accepted. + self.get_success( + self.handler.update_membership( + requester=create_requester(self.chris), + target=UserID.from_string(self.chris), + room_id=self.room_id, + action=Membership.JOIN, + ) + ) + + @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 1}}) + def test_remote_joins_contribute_to_rate_limit(self) -> None: + # Join once, to fill the rate limiter bucket. + # + # To do this we have to mock the responses from the remote homeserver. + # We also patch out a bunch of event checks on our end. All we're really + # trying to check here is that remote joins will bump the rate limter when + # they are persisted. + create_event_source = { + "auth_events": [], + "content": { + "creator": f"@creator:{self.OTHER_SERVER_NAME}", + "room_version": self.hs.config.server.default_room_version.identifier, + }, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "room_id": self.intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": "", + "type": EventTypes.Create, + } + self.add_hashes_and_signatures_from_other_server( + create_event_source, + self.hs.config.server.default_room_version, + ) + create_event = FrozenEventV3( + create_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + + join_event_source = { + "auth_events": [create_event.event_id], + "content": {"membership": "join"}, + "depth": 1, + "origin_server_ts": 100, + "prev_events": [create_event.event_id], + "sender": self.bob, + "state_key": self.bob, + "room_id": self.intially_unjoined_room_id, + "type": EventTypes.Member, + } + add_hashes_and_signatures( + self.hs.config.server.default_room_version, + join_event_source, + self.hs.hostname, + self.hs.signing_key, + ) + join_event = FrozenEventV3( + join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + + mock_make_membership_event = Mock( + return_value=make_awaitable( + ( + self.OTHER_SERVER_NAME, + join_event, + self.hs.config.server.default_room_version, + ) + ) + ) + mock_send_join = Mock( + return_value=make_awaitable( + SendJoinResult( + join_event, + self.OTHER_SERVER_NAME, + state=[create_event], + auth_chain=[create_event], + partial_state=False, + servers_in_room=[], + ) + ) + ) + + with patch.object( + self.handler.federation_handler.federation_client, + "make_membership_event", + mock_make_membership_event, + ), patch.object( + self.handler.federation_handler.federation_client, + "send_join", + mock_send_join, + ), patch( + "synapse.event_auth._is_membership_change_allowed", + return_value=None, + ), patch( + "synapse.handlers.federation_event.check_state_dependent_auth_rules", + return_value=None, + ): + self.get_success( + self.handler.update_membership( + requester=create_requester(self.bob), + target=UserID.from_string(self.bob), + room_id=self.intially_unjoined_room_id, + action=Membership.JOIN, + remote_room_hosts=[self.OTHER_SERVER_NAME], + ) + ) + + # Try to join as Chris. Should get denied. + self.get_failure( + self.handler.update_membership( + requester=create_requester(self.chris), + target=UserID.from_string(self.chris), + room_id=self.intially_unjoined_room_id, + action=Membership.JOIN, + remote_room_hosts=[self.OTHER_SERVER_NAME], + ), + LimitExceededError, + ) + + # TODO: test that remote joins to a room are rate limited. + # Could do this by setting the burst count to 1, then: + # - remote-joining a room + # - immediately leaving + # - trying to remote-join again. + + +class TestReplicatedJoinsLimitedByPerRoomRateLimiter(RedisMultiWorkerStreamTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + synapse.rest.client.login.register_servlets, + synapse.rest.client.room.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.handler = hs.get_room_member_handler() + + # Create three users. + self.alice = self.register_user("alice", "pass") + self.alice_token = self.login("alice", "pass") + self.bob = self.register_user("bob", "pass") + self.bob_token = self.login("bob", "pass") + self.chris = self.register_user("chris", "pass") + self.chris_token = self.login("chris", "pass") + + # Create a room on this homeserver. + # Note that this counts as a + self.room_id = self.helper.create_room_as(self.alice, tok=self.alice_token) + self.intially_unjoined_room_id = "!example:otherhs" + + @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 2}}) + def test_local_users_joining_on_another_worker_contribute_to_rate_limit( + self, + ) -> None: + # The rate limiter has accumulated one token from Alice's join after the create + # event. + self.replicate() + + # Spawn another worker and have bob join via it. + worker_app = self.make_worker_hs( + "synapse.app.generic_worker", extra_config={"worker_name": "other worker"} + ) + worker_site = self._hs_to_site[worker_app] + channel = make_request( + self.reactor, + worker_site, + "POST", + f"/_matrix/client/v3/rooms/{self.room_id}/join", + access_token=self.bob_token, + ) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) + + # wait for join to arrive over replication + self.replicate() + + # Try to join as Chris on the worker. Should get denied because Alice + # and Bob have both joined the room. + self.get_failure( + worker_app.get_room_member_handler().update_membership( + requester=create_requester(self.chris), + target=UserID.from_string(self.chris), + room_id=self.room_id, + action=Membership.JOIN, + ), + LimitExceededError, + ) + + # Try to join as Chris on the original worker. Should get denied because Alice + # and Bob have both joined the room. + self.get_failure( + self.handler.update_membership( + requester=create_requester(self.chris), + target=UserID.from_string(self.chris), + room_id=self.room_id, + action=Membership.JOIN, + ), + LimitExceededError, + ) diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py index ecc7cc6461..e3f38fbcc5 100644 --- a/tests/handlers/test_sync.py +++ b/tests/handlers/test_sync.py @@ -159,7 +159,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase): # Blow away caches (supported room versions can only change due to a restart). self.store.get_rooms_for_user_with_stream_ordering.invalidate_all() - self.store._get_event_cache.clear() + self.get_success(self.store._get_event_cache.clear()) self.store._event_ref.clear() # The rooms should be excluded from the sync response. diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py index e430941d27..3b14c76d7e 100644 --- a/tests/logging/test_opentracing.py +++ b/tests/logging/test_opentracing.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import cast + from twisted.internet import defer from twisted.test.proto_helpers import MemoryReactorClock @@ -40,6 +42,15 @@ from tests.unittest import TestCase class LogContextScopeManagerTestCase(TestCase): + """ + Test logging contexts and active opentracing spans. + + There's casts throughout this from generic opentracing objects (e.g. + opentracing.Span) to the ones specific to Jaeger since they have additional + properties that these tests depend on. This is safe since the only supported + opentracing backend is Jaeger. + """ + if LogContextScopeManager is None: skip = "Requires opentracing" # type: ignore[unreachable] if jaeger_client is None: @@ -50,7 +61,7 @@ class LogContextScopeManagerTestCase(TestCase): # global variables that power opentracing. We create our own tracer instance # and test with it. - scope_manager = LogContextScopeManager({}) + scope_manager = LogContextScopeManager() config = jaeger_client.config.Config( config={}, service_name="test", scope_manager=scope_manager ) @@ -69,7 +80,7 @@ class LogContextScopeManagerTestCase(TestCase): # start_active_span should start and activate a span. scope = start_active_span("span", tracer=self._tracer) - span = scope.span + span = cast(jaeger_client.Span, scope.span) self.assertEqual(self._tracer.active_span, span) self.assertIsNotNone(span.start_time) @@ -91,6 +102,7 @@ class LogContextScopeManagerTestCase(TestCase): with LoggingContext("root context"): with start_active_span("root span", tracer=self._tracer) as root_scope: self.assertEqual(self._tracer.active_span, root_scope.span) + root_context = cast(jaeger_client.SpanContext, root_scope.span.context) scope1 = start_active_span( "child1", @@ -99,9 +111,8 @@ class LogContextScopeManagerTestCase(TestCase): self.assertEqual( self._tracer.active_span, scope1.span, "child1 was not activated" ) - self.assertEqual( - scope1.span.context.parent_id, root_scope.span.context.span_id - ) + context1 = cast(jaeger_client.SpanContext, scope1.span.context) + self.assertEqual(context1.parent_id, root_context.span_id) scope2 = start_active_span_follows_from( "child2", @@ -109,17 +120,18 @@ class LogContextScopeManagerTestCase(TestCase): tracer=self._tracer, ) self.assertEqual(self._tracer.active_span, scope2.span) - self.assertEqual( - scope2.span.context.parent_id, scope1.span.context.span_id - ) + context2 = cast(jaeger_client.SpanContext, scope2.span.context) + self.assertEqual(context2.parent_id, context1.span_id) with scope1, scope2: pass # the root scope should be restored self.assertEqual(self._tracer.active_span, root_scope.span) - self.assertIsNotNone(scope2.span.end_time) - self.assertIsNotNone(scope1.span.end_time) + span2 = cast(jaeger_client.Span, scope2.span) + span1 = cast(jaeger_client.Span, scope1.span) + self.assertIsNotNone(span2.end_time) + self.assertIsNotNone(span1.end_time) self.assertIsNone(self._tracer.active_span) diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 9b623d0033..718f489577 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -16,13 +16,23 @@ from typing import Dict, Optional, Set, Tuple, Union import frozendict +from twisted.test.proto_helpers import MemoryReactor + +import synapse.rest.admin +from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions +from synapse.appservice import ApplicationService from synapse.events import FrozenEvent from synapse.push import push_rule_evaluator from synapse.push.push_rule_evaluator import PushRuleEvaluatorForEvent +from synapse.rest.client import login, register, room +from synapse.server import HomeServer +from synapse.storage.databases.main.appservice import _make_exclusive_regex from synapse.types import JsonDict +from synapse.util import Clock from tests import unittest +from tests.test_utils.event_injection import create_event, inject_member_event class PushRuleEvaluatorTestCase(unittest.TestCase): @@ -354,3 +364,78 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "event_type": "*.reaction", } self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) + + +class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase): + """Tests for the bulk push rule evaluator""" + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + register.register_servlets, + room.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer): + # Define an application service so that we can register appservice users + self._service_token = "some_token" + self._service = ApplicationService( + self._service_token, + "as1", + "@as.sender:test", + namespaces={ + "users": [ + {"regex": "@_as_.*:test", "exclusive": True}, + {"regex": "@as.sender:test", "exclusive": True}, + ] + }, + msc3202_transaction_extensions=True, + ) + self.hs.get_datastores().main.services_cache = [self._service] + self.hs.get_datastores().main.exclusive_user_regex = _make_exclusive_regex( + [self._service] + ) + + self._as_user, _ = self.register_appservice_user( + "_as_user", self._service_token + ) + + self.evaluator = self.hs.get_bulk_push_rule_evaluator() + + def test_ignore_appservice_users(self) -> None: + "Test that we don't generate push for appservice users" + + user_id = self.register_user("user", "pass") + token = self.login("user", "pass") + + room_id = self.helper.create_room_as(user_id, tok=token) + self.get_success( + inject_member_event(self.hs, room_id, self._as_user, Membership.JOIN) + ) + + event, context = self.get_success( + create_event( + self.hs, + type=EventTypes.Message, + room_id=room_id, + sender=user_id, + content={"body": "test", "msgtype": "m.text"}, + ) + ) + + # Assert the returned push rules do not contain the app service user + rules = self.get_success(self.evaluator._get_rules_for_event(event)) + self.assertTrue(self._as_user not in rules) + + # Assert that no push actions have been added to the staging table (the + # sender should not be pushed for the event) + users_with_push_actions = self.get_success( + self.hs.get_datastores().main.db_pool.simple_select_onecol( + table="event_push_actions_staging", + keyvalues={"event_id": event.event_id}, + retcol="user_id", + desc="test_ignore_appservice_users", + ) + ) + + self.assertEqual(len(users_with_push_actions), 0) diff --git a/tests/replication/slave/storage/test_account_data.py b/tests/replication/slave/storage/test_account_data.py deleted file mode 100644 index 1524087c43..0000000000 --- a/tests/replication/slave/storage/test_account_data.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.replication.slave.storage.account_data import SlavedAccountDataStore - -from ._base import BaseSlavedStoreTestCase - -USER_ID = "@feeling:blue" -TYPE = "my.type" - - -class SlavedAccountDataStoreTestCase(BaseSlavedStoreTestCase): - - STORE_TYPE = SlavedAccountDataStore - - def test_user_account_data(self): - self.get_success( - self.master_store.add_account_data_for_user(USER_ID, TYPE, {"a": 1}) - ) - self.replicate() - self.check( - "get_global_account_data_by_type_for_user", [USER_ID, TYPE], {"a": 1} - ) - - self.get_success( - self.master_store.add_account_data_for_user(USER_ID, TYPE, {"a": 2}) - ) - self.replicate() - self.check( - "get_global_account_data_by_type_for_user", [USER_ID, TYPE], {"a": 2} - ) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index ca6af9417b..2526136ff8 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -21,7 +21,7 @@ from parameterized import parameterized from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes, Membership, RoomTypes from synapse.api.errors import Codes from synapse.handlers.pagination import PaginationHandler from synapse.rest.client import directory, events, login, room @@ -1130,6 +1130,8 @@ class RoomTestCase(unittest.HomeserverTestCase): self.assertIn("guest_access", r) self.assertIn("history_visibility", r) self.assertIn("state_events", r) + self.assertIn("room_type", r) + self.assertIsNone(r["room_type"]) # Check that the correct number of total rooms was returned self.assertEqual(channel.json_body["total_rooms"], total_rooms) @@ -1229,7 +1231,11 @@ class RoomTestCase(unittest.HomeserverTestCase): def test_correct_room_attributes(self) -> None: """Test the correct attributes for a room are returned""" # Create a test room - room_id = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) + room_id = self.helper.create_room_as( + self.admin_user, + tok=self.admin_user_tok, + extra_content={"creation_content": {"type": RoomTypes.SPACE}}, + ) test_alias = "#test:test" test_room_name = "something" @@ -1306,6 +1312,7 @@ class RoomTestCase(unittest.HomeserverTestCase): self.assertEqual(room_id, r["room_id"]) self.assertEqual(test_room_name, r["name"]) self.assertEqual(test_alias, r["canonical_alias"]) + self.assertEqual(RoomTypes.SPACE, r["room_type"]) def test_room_list_sort_order(self) -> None: """Test room list sort ordering. alphabetical name versus number of members, @@ -1579,8 +1586,8 @@ class RoomTestCase(unittest.HomeserverTestCase): access_token=self.admin_user_tok, ) self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) - self.assertEqual(room_id, channel.json_body.get("rooms")[0].get("room_id")) - self.assertEqual("ж", channel.json_body.get("rooms")[0].get("name")) + self.assertEqual(room_id, channel.json_body["rooms"][0].get("room_id")) + self.assertEqual("ж", channel.json_body["rooms"][0].get("name")) def test_single_room(self) -> None: """Test that a single room can be requested correctly""" @@ -1630,7 +1637,7 @@ class RoomTestCase(unittest.HomeserverTestCase): self.assertIn("guest_access", channel.json_body) self.assertIn("history_visibility", channel.json_body) self.assertIn("state_events", channel.json_body) - + self.assertIn("room_type", channel.json_body) self.assertEqual(room_id_1, channel.json_body["room_id"]) def test_single_room_devices(self) -> None: diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 0d44102237..12db68d564 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -1379,7 +1379,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): content=body, ) - self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["name"]) self.assertEqual("Bob's name", channel.json_body["displayname"]) self.assertEqual("email", channel.json_body["threepids"][0]["medium"]) @@ -1434,7 +1434,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): content=body, ) - self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["name"]) self.assertEqual("Bob's name", channel.json_body["displayname"]) self.assertEqual("email", channel.json_body["threepids"][0]["medium"]) @@ -1488,7 +1488,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): if channel.code != HTTPStatus.OK: raise HttpResponseException( - channel.code, channel.result["reason"], channel.json_body + channel.code, channel.result["reason"], channel.result["body"] ) # Set monthly active users to the limit @@ -1512,7 +1512,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): content={"password": "abc123", "admin": False}, ) - self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["name"]) self.assertFalse(channel.json_body["admin"]) @@ -1550,7 +1550,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): ) # Admin user is not blocked by mau anymore - self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["name"]) self.assertFalse(channel.json_body["admin"]) @@ -1585,7 +1585,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): content=body, ) - self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["name"]) self.assertEqual("email", channel.json_body["threepids"][0]["medium"]) self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"]) @@ -1626,7 +1626,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): content=body, ) - self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["name"]) self.assertEqual("email", channel.json_body["threepids"][0]["medium"]) self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"]) @@ -1636,6 +1636,41 @@ class UserRestTestCase(unittest.HomeserverTestCase): ) self.assertEqual(len(pushers), 0) + @override_config( + { + "email": { + "enable_notifs": True, + "notif_for_new_users": True, + "notif_from": "test@example.com", + }, + "public_baseurl": "https://example.com", + } + ) + def test_create_user_email_notif_for_new_users_with_msisdn_threepid(self) -> None: + """ + Check that a new regular user is created successfully when they have a msisdn + threepid and email notif_for_new_users is set to True. + """ + url = self.url_prefix % "@bob:test" + + # Create user + body = { + "password": "abc123", + "threepids": [{"medium": "msisdn", "address": "1234567890"}], + } + + channel = self.make_request( + "PUT", + url, + access_token=self.admin_user_tok, + content=body, + ) + + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) + self.assertEqual("@bob:test", channel.json_body["name"]) + self.assertEqual("msisdn", channel.json_body["threepids"][0]["medium"]) + self.assertEqual("1234567890", channel.json_body["threepids"][0]["address"]) + def test_set_password(self) -> None: """ Test setting a new password for another user. @@ -2372,7 +2407,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): content={"password": "abc123"}, ) - self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body) self.assertEqual("@bob:test", channel.json_body["name"]) self.assertEqual("bob", channel.json_body["displayname"]) diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py index a43a137273..7ae926dc9c 100644 --- a/tests/rest/client/test_account.py +++ b/tests/rest/client/test_account.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json import os import re from email.parser import Parser +from http import HTTPStatus from typing import Any, Dict, List, Optional, Union from unittest.mock import Mock @@ -95,10 +95,8 @@ class PasswordResetTestCase(unittest.HomeserverTestCase): """ body = {"type": "m.login.password", "user": username, "password": password} - channel = self.make_request( - "POST", "/_matrix/client/r0/login", json.dumps(body).encode("utf8") - ) - self.assertEqual(channel.code, 403, channel.result) + channel = self.make_request("POST", "/_matrix/client/r0/login", body) + self.assertEqual(channel.code, HTTPStatus.FORBIDDEN, channel.result) def test_basic_password_reset(self) -> None: """Test basic password reset flow""" @@ -347,7 +345,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase): shorthand=False, ) - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) # Now POST to the same endpoint, mimicking the same behaviour as clicking the # password reset confirm button @@ -362,7 +360,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase): shorthand=False, content_is_form=True, ) - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) def _get_link_from_email(self) -> str: assert self.email_attempts, "No emails have been sent" @@ -390,7 +388,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase): new_password: str, session_id: str, client_secret: str, - expected_code: int = 200, + expected_code: int = HTTPStatus.OK, ) -> None: channel = self.make_request( "POST", @@ -479,16 +477,14 @@ class DeactivateTestCase(unittest.HomeserverTestCase): self.assertEqual(memberships[0].room_id, room_id, memberships) def deactivate(self, user_id: str, tok: str) -> None: - request_data = json.dumps( - { - "auth": { - "type": "m.login.password", - "user": user_id, - "password": "test", - }, - "erase": False, - } - ) + request_data = { + "auth": { + "type": "m.login.password", + "user": user_id, + "password": "test", + }, + "erase": False, + } channel = self.make_request( "POST", "account/deactivate", request_data, access_token=tok ) @@ -715,7 +711,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): }, access_token=self.user_id_tok, ) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) # Get user @@ -725,7 +723,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertFalse(channel.json_body["threepids"]) def test_delete_email(self) -> None: @@ -747,7 +745,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): {"medium": "email", "address": self.email}, access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # Get user channel = self.make_request( @@ -756,7 +754,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertFalse(channel.json_body["threepids"]) def test_delete_email_if_disabled(self) -> None: @@ -781,7 +779,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) # Get user @@ -791,7 +791,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertEqual("email", channel.json_body["threepids"][0]["medium"]) self.assertEqual(self.email, channel.json_body["threepids"][0]["address"]) @@ -817,7 +817,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): }, access_token=self.user_id_tok, ) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) self.assertEqual(Codes.THREEPID_AUTH_FAILED, channel.json_body["errcode"]) # Get user @@ -827,7 +829,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertFalse(channel.json_body["threepids"]) def test_no_valid_token(self) -> None: @@ -852,7 +854,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): }, access_token=self.user_id_tok, ) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) self.assertEqual(Codes.THREEPID_AUTH_FAILED, channel.json_body["errcode"]) # Get user @@ -862,7 +866,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertFalse(channel.json_body["threepids"]) @override_config({"next_link_domain_whitelist": None}) @@ -872,7 +876,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): "something@example.com", "some_secret", next_link="https://example.com/a/good/site", - expect_code=200, + expect_code=HTTPStatus.OK, ) @override_config({"next_link_domain_whitelist": None}) @@ -884,7 +888,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): "something@example.com", "some_secret", next_link="some-protocol://abcdefghijklmopqrstuvwxyz", - expect_code=200, + expect_code=HTTPStatus.OK, ) @override_config({"next_link_domain_whitelist": None}) @@ -895,7 +899,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): "something@example.com", "some_secret", next_link="file:///host/path", - expect_code=400, + expect_code=HTTPStatus.BAD_REQUEST, ) @override_config({"next_link_domain_whitelist": ["example.com", "example.org"]}) @@ -907,28 +911,28 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): "something@example.com", "some_secret", next_link=None, - expect_code=200, + expect_code=HTTPStatus.OK, ) self._request_token( "something@example.com", "some_secret", next_link="https://example.com/some/good/page", - expect_code=200, + expect_code=HTTPStatus.OK, ) self._request_token( "something@example.com", "some_secret", next_link="https://example.org/some/also/good/page", - expect_code=200, + expect_code=HTTPStatus.OK, ) self._request_token( "something@example.com", "some_secret", next_link="https://bad.example.org/some/bad/page", - expect_code=400, + expect_code=HTTPStatus.BAD_REQUEST, ) @override_config({"next_link_domain_whitelist": []}) @@ -940,7 +944,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): "something@example.com", "some_secret", next_link="https://example.com/a/page", - expect_code=400, + expect_code=HTTPStatus.BAD_REQUEST, ) def _request_token( @@ -948,8 +952,8 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): email: str, client_secret: str, next_link: Optional[str] = None, - expect_code: int = 200, - ) -> str: + expect_code: int = HTTPStatus.OK, + ) -> Optional[str]: """Request a validation token to add an email address to a user's account Args: @@ -959,7 +963,8 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): expect_code: Expected return code of the call Returns: - The ID of the new threepid validation session + The ID of the new threepid validation session, or None if the response + did not contain a session ID. """ body = {"client_secret": client_secret, "email": email, "send_attempt": 1} if next_link: @@ -992,7 +997,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): b"account/3pid/email/requestToken", {"client_secret": client_secret, "email": email, "send_attempt": 1}, ) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) self.assertEqual(expected_errcode, channel.json_body["errcode"]) self.assertEqual(expected_error, channel.json_body["error"]) @@ -1001,7 +1008,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): path = link.replace("https://example.com", "") channel = self.make_request("GET", path, shorthand=False) - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) def _get_link_from_email(self) -> str: assert self.email_attempts, "No emails have been sent" @@ -1051,7 +1058,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # Get user channel = self.make_request( @@ -1060,7 +1067,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): access_token=self.user_id_tok, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertEqual("email", channel.json_body["threepids"][0]["medium"]) threepids = {threepid["address"] for threepid in channel.json_body["threepids"]} @@ -1091,7 +1098,7 @@ class AccountStatusTestCase(unittest.HomeserverTestCase): """Tests that not providing any MXID raises an error.""" self._test_status( users=None, - expected_status_code=400, + expected_status_code=HTTPStatus.BAD_REQUEST, expected_errcode=Codes.MISSING_PARAM, ) @@ -1099,7 +1106,7 @@ class AccountStatusTestCase(unittest.HomeserverTestCase): """Tests that providing an invalid MXID raises an error.""" self._test_status( users=["bad:test"], - expected_status_code=400, + expected_status_code=HTTPStatus.BAD_REQUEST, expected_errcode=Codes.INVALID_PARAM, ) @@ -1285,7 +1292,7 @@ class AccountStatusTestCase(unittest.HomeserverTestCase): def _test_status( self, users: Optional[List[str]], - expected_status_code: int = 200, + expected_status_code: int = HTTPStatus.OK, expected_statuses: Optional[Dict[str, Dict[str, bool]]] = None, expected_failures: Optional[List[str]] = None, expected_errcode: Optional[str] = None, diff --git a/tests/rest/client/test_directory.py b/tests/rest/client/test_directory.py index aca03afd0e..7a88aa2cda 100644 --- a/tests/rest/client/test_directory.py +++ b/tests/rest/client/test_directory.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json from http import HTTPStatus from twisted.test.proto_helpers import MemoryReactor +from synapse.appservice import ApplicationService from synapse.rest import admin from synapse.rest.client import directory, login, room from synapse.server import HomeServer @@ -96,8 +96,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase): # We use deliberately a localpart under the length threshold so # that we can make sure that the check is done on the whole alias. - data = {"room_alias_name": random_string(256 - len(self.hs.hostname))} - request_data = json.dumps(data) + request_data = {"room_alias_name": random_string(256 - len(self.hs.hostname))} channel = self.make_request( "POST", url, request_data, access_token=self.user_tok ) @@ -109,8 +108,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase): # Check with an alias of allowed length. There should already be # a test that ensures it works in test_register.py, but let's be # as cautious as possible here. - data = {"room_alias_name": random_string(5)} - request_data = json.dumps(data) + request_data = {"room_alias_name": random_string(5)} channel = self.make_request( "POST", url, request_data, access_token=self.user_tok ) @@ -129,6 +127,38 @@ class DirectoryTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + def test_deleting_alias_via_directory_appservice(self) -> None: + user_id = "@as:test" + as_token = "i_am_an_app_service" + + appservice = ApplicationService( + as_token, + id="1234", + namespaces={"aliases": [{"regex": "#asns-*", "exclusive": True}]}, + sender=user_id, + ) + self.hs.get_datastores().main.services_cache.append(appservice) + + # Add an alias for the room, as the appservice + alias = RoomAlias(f"asns-{random_string(5)}", self.hs.hostname).to_string() + request_data = {"room_id": self.room_id} + + channel = self.make_request( + "PUT", + f"/_matrix/client/r0/directory/room/{alias}", + request_data, + access_token=as_token, + ) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + + # Then try to remove the alias, as the appservice + channel = self.make_request( + "DELETE", + f"/_matrix/client/r0/directory/room/{alias}", + access_token=as_token, + ) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + def test_deleting_nonexistant_alias(self) -> None: # Check that no alias exists alias = "#potato:test" @@ -159,8 +189,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase): self.hs.hostname, ) - data = {"aliases": [self.random_alias(alias_length)]} - request_data = json.dumps(data) + request_data = {"aliases": [self.random_alias(alias_length)]} channel = self.make_request( "PUT", url, request_data, access_token=self.user_tok @@ -172,8 +201,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase): ) -> str: alias = self.random_alias(alias_length) url = "/_matrix/client/r0/directory/room/%s" % alias - data = {"room_id": self.room_id} - request_data = json.dumps(data) + request_data = {"room_id": self.room_id} channel = self.make_request( "PUT", url, request_data, access_token=self.user_tok @@ -181,6 +209,19 @@ class DirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, expected_code, channel.result) return alias + def test_invalid_alias(self) -> None: + alias = "#potato" + channel = self.make_request( + "GET", + f"/_matrix/client/r0/directory/room/{alias}", + access_token=self.user_tok, + ) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) + self.assertIn("error", channel.json_body, channel.json_body) + self.assertEqual( + channel.json_body["errcode"], "M_INVALID_PARAM", channel.json_body + ) + def random_alias(self, length: int) -> str: return RoomAlias(random_string(length), self.hs.hostname).to_string() diff --git a/tests/rest/client/test_identity.py b/tests/rest/client/test_identity.py index 299b9d21e2..dc17c9d113 100644 --- a/tests/rest/client/test_identity.py +++ b/tests/rest/client/test_identity.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json from http import HTTPStatus from twisted.test.proto_helpers import MemoryReactor @@ -51,12 +50,11 @@ class IdentityTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, HTTPStatus.OK, channel.result) room_id = channel.json_body["room_id"] - params = { + request_data = { "id_server": "testis", "medium": "email", "address": "test@example.com", } - request_data = json.dumps(params) request_url = ("/rooms/%s/invite" % (room_id)).encode("ascii") channel = self.make_request( b"POST", request_url, request_data, access_token=tok diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index f4ea1209d9..a2958f6959 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json import time import urllib.parse -from typing import Any, Dict, List, Optional, Union +from http import HTTPStatus +from typing import Any, Dict, List, Optional from unittest.mock import Mock from urllib.parse import urlencode @@ -41,7 +41,7 @@ from tests.test_utils.html_parsers import TestHtmlParser from tests.unittest import HomeserverTestCase, override_config, skip_unless try: - import jwt + from authlib.jose import jwk, jwt HAS_JWT = True except ImportError: @@ -261,20 +261,20 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): } channel = self.make_request(b"POST", LOGIN_URL, params) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) access_token = channel.json_body["access_token"] device_id = channel.json_body["device_id"] # we should now be able to make requests with the access token channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) # time passes self.reactor.advance(24 * 3600) # ... and we should be soft-logouted channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 401, channel.result) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") self.assertEqual(channel.json_body["soft_logout"], True) @@ -288,7 +288,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): # more requests with the expired token should still return a soft-logout self.reactor.advance(3600) channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 401, channel.result) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") self.assertEqual(channel.json_body["soft_logout"], True) @@ -296,7 +296,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): self._delete_device(access_token_2, "kermit", "monkey", device_id) channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 401, channel.result) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") self.assertEqual(channel.json_body["soft_logout"], False) @@ -307,7 +307,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): channel = self.make_request( b"DELETE", "devices/" + device_id, access_token=access_token ) - self.assertEqual(channel.code, 401, channel.result) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) # check it's a UI-Auth fail self.assertEqual( set(channel.json_body.keys()), @@ -330,7 +330,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): access_token=access_token, content={"auth": auth}, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) @override_config({"session_lifetime": "24h"}) def test_session_can_hard_logout_after_being_soft_logged_out(self) -> None: @@ -341,14 +341,14 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): # we should now be able to make requests with the access token channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) # time passes self.reactor.advance(24 * 3600) # ... and we should be soft-logouted channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 401, channel.result) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") self.assertEqual(channel.json_body["soft_logout"], True) @@ -367,14 +367,14 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): # we should now be able to make requests with the access token channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) # time passes self.reactor.advance(24 * 3600) # ... and we should be soft-logouted channel = self.make_request(b"GET", TEST_URL, access_token=access_token) - self.assertEqual(channel.code, 401, channel.result) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") self.assertEqual(channel.json_body["soft_logout"], True) @@ -399,7 +399,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", "/_matrix/client/v3/login", - json.dumps(body).encode("utf8"), + body, custom_headers=None, ) @@ -466,7 +466,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): def test_get_login_flows(self) -> None: """GET /login should return password and SSO flows""" channel = self.make_request("GET", "/_matrix/client/r0/login") - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) expected_flow_types = [ "m.login.cas", @@ -494,14 +494,14 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): """/login/sso/redirect should redirect to an identity picker""" # first hit the redirect url, which should redirect to our idp picker channel = self._make_sso_redirect_request(None) - self.assertEqual(channel.code, 302, channel.result) + self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result) location_headers = channel.headers.getRawHeaders("Location") assert location_headers uri = location_headers[0] # hitting that picker should give us some HTML channel = self.make_request("GET", uri) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) # parse the form to check it has fields assumed elsewhere in this class html = channel.result["body"].decode("utf-8") @@ -530,7 +530,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): + "&idp=cas", shorthand=False, ) - self.assertEqual(channel.code, 302, channel.result) + self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result) location_headers = channel.headers.getRawHeaders("Location") assert location_headers cas_uri = location_headers[0] @@ -555,7 +555,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL) + "&idp=saml", ) - self.assertEqual(channel.code, 302, channel.result) + self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result) location_headers = channel.headers.getRawHeaders("Location") assert location_headers saml_uri = location_headers[0] @@ -579,7 +579,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL) + "&idp=oidc", ) - self.assertEqual(channel.code, 302, channel.result) + self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result) location_headers = channel.headers.getRawHeaders("Location") assert location_headers oidc_uri = location_headers[0] @@ -606,7 +606,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): channel = self.helper.complete_oidc_auth(oidc_uri, cookies, {"sub": "user1"}) # that should serve a confirmation page - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) content_type_headers = channel.headers.getRawHeaders("Content-Type") assert content_type_headers self.assertTrue(content_type_headers[-1].startswith("text/html")) @@ -634,7 +634,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): "/login", content={"type": "m.login.token", "token": login_token}, ) - self.assertEqual(chan.code, 200, chan.result) + self.assertEqual(chan.code, HTTPStatus.OK, chan.result) self.assertEqual(chan.json_body["user_id"], "@user1:test") def test_multi_sso_redirect_to_unknown(self) -> None: @@ -643,18 +643,18 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): "GET", "/_synapse/client/pick_idp?redirectUrl=http://x&idp=xyz", ) - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) def test_client_idp_redirect_to_unknown(self) -> None: """If the client tries to pick an unknown IdP, return a 404""" channel = self._make_sso_redirect_request("xxx") - self.assertEqual(channel.code, 404, channel.result) + self.assertEqual(channel.code, HTTPStatus.NOT_FOUND, channel.result) self.assertEqual(channel.json_body["errcode"], "M_NOT_FOUND") def test_client_idp_redirect_to_oidc(self) -> None: """If the client pick a known IdP, redirect to it""" channel = self._make_sso_redirect_request("oidc") - self.assertEqual(channel.code, 302, channel.result) + self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result) location_headers = channel.headers.getRawHeaders("Location") assert location_headers oidc_uri = location_headers[0] @@ -765,7 +765,7 @@ class CASTestCase(unittest.HomeserverTestCase): channel = self.make_request("GET", cas_ticket_url) # Test that the response is HTML. - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) content_type_header_value = "" for header in channel.result.get("headers", []): if header[0] == b"Content-Type": @@ -841,7 +841,7 @@ class CASTestCase(unittest.HomeserverTestCase): self.assertIn(b"SSO account deactivated", channel.result["body"]) -@skip_unless(HAS_JWT, "requires jwt") +@skip_unless(HAS_JWT, "requires authlib") class JWTTestCase(unittest.HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -866,11 +866,9 @@ class JWTTestCase(unittest.HomeserverTestCase): return config def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_secret) -> str: - # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str. - result: Union[str, bytes] = jwt.encode(payload, secret, self.jwt_algorithm) - if isinstance(result, bytes): - return result.decode("ascii") - return result + header = {"alg": self.jwt_algorithm} + result: bytes = jwt.encode(header, payload, secret) + return result.decode("ascii") def jwt_login(self, *args: Any) -> FakeChannel: params = {"type": "org.matrix.login.jwt", "token": self.jwt_encode(*args)} @@ -902,7 +900,8 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.result["code"], b"403", channel.result) self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual( - channel.json_body["error"], "JWT validation failed: Signature has expired" + channel.json_body["error"], + "JWT validation failed: expired_token: The token is expired", ) def test_login_jwt_not_before(self) -> None: @@ -912,7 +911,7 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual( channel.json_body["error"], - "JWT validation failed: The token is not yet valid (nbf)", + "JWT validation failed: invalid_token: The token is not valid yet", ) def test_login_no_sub(self) -> None: @@ -934,7 +933,8 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.result["code"], b"403", channel.result) self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual( - channel.json_body["error"], "JWT validation failed: Invalid issuer" + channel.json_body["error"], + 'JWT validation failed: invalid_claim: Invalid claim "iss"', ) # Not providing an issuer. @@ -943,7 +943,7 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual( channel.json_body["error"], - 'JWT validation failed: Token is missing the "iss" claim', + 'JWT validation failed: missing_claim: Missing "iss" claim', ) def test_login_iss_no_config(self) -> None: @@ -965,7 +965,8 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.result["code"], b"403", channel.result) self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual( - channel.json_body["error"], "JWT validation failed: Invalid audience" + channel.json_body["error"], + 'JWT validation failed: invalid_claim: Invalid claim "aud"', ) # Not providing an audience. @@ -974,7 +975,7 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual( channel.json_body["error"], - 'JWT validation failed: Token is missing the "aud" claim', + 'JWT validation failed: missing_claim: Missing "aud" claim', ) def test_login_aud_no_config(self) -> None: @@ -983,7 +984,8 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.result["code"], b"403", channel.result) self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual( - channel.json_body["error"], "JWT validation failed: Invalid audience" + channel.json_body["error"], + 'JWT validation failed: invalid_claim: Invalid claim "aud"', ) def test_login_default_sub(self) -> None: @@ -1010,7 +1012,7 @@ class JWTTestCase(unittest.HomeserverTestCase): # The JWTPubKeyTestCase is a complement to JWTTestCase where we instead use # RSS256, with a public key configured in synapse as "jwt_secret", and tokens # signed by the private key. -@skip_unless(HAS_JWT, "requires jwt") +@skip_unless(HAS_JWT, "requires authlib") class JWTPubKeyTestCase(unittest.HomeserverTestCase): servlets = [ login.register_servlets, @@ -1071,11 +1073,11 @@ class JWTPubKeyTestCase(unittest.HomeserverTestCase): return config def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_privatekey) -> str: - # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str. - result: Union[bytes, str] = jwt.encode(payload, secret, "RS256") - if isinstance(result, bytes): - return result.decode("ascii") - return result + header = {"alg": "RS256"} + if secret.startswith("-----BEGIN RSA PRIVATE KEY-----"): + secret = jwk.dumps(secret, kty="RSA") + result: bytes = jwt.encode(header, payload, secret) + return result.decode("ascii") def jwt_login(self, *args: Any) -> FakeChannel: params = {"type": "org.matrix.login.jwt", "token": self.jwt_encode(*args)} @@ -1244,7 +1246,7 @@ class UsernamePickerTestCase(HomeserverTestCase): ) # that should redirect to the username picker - self.assertEqual(channel.code, 302, channel.result) + self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result) location_headers = channel.headers.getRawHeaders("Location") assert location_headers picker_url = location_headers[0] @@ -1288,7 +1290,7 @@ class UsernamePickerTestCase(HomeserverTestCase): ("Content-Length", str(len(content))), ], ) - self.assertEqual(chan.code, 302, chan.result) + self.assertEqual(chan.code, HTTPStatus.FOUND, chan.result) location_headers = chan.headers.getRawHeaders("Location") assert location_headers @@ -1298,7 +1300,7 @@ class UsernamePickerTestCase(HomeserverTestCase): path=location_headers[0], custom_headers=[("Cookie", "username_mapping_session=" + session_id)], ) - self.assertEqual(chan.code, 302, chan.result) + self.assertEqual(chan.code, HTTPStatus.FOUND, chan.result) location_headers = chan.headers.getRawHeaders("Location") assert location_headers @@ -1323,5 +1325,5 @@ class UsernamePickerTestCase(HomeserverTestCase): "/login", content={"type": "m.login.token", "token": login_token}, ) - self.assertEqual(chan.code, 200, chan.result) + self.assertEqual(chan.code, HTTPStatus.OK, chan.result) self.assertEqual(chan.json_body["user_id"], "@bobby:test") diff --git a/tests/rest/client/test_password_policy.py b/tests/rest/client/test_password_policy.py index 3a74d2e96c..e19d21d6ee 100644 --- a/tests/rest/client/test_password_policy.py +++ b/tests/rest/client/test_password_policy.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json from http import HTTPStatus from twisted.test.proto_helpers import MemoryReactor @@ -89,7 +88,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase): ) def test_password_too_short(self) -> None: - request_data = json.dumps({"username": "kermit", "password": "shorty"}) + request_data = {"username": "kermit", "password": "shorty"} channel = self.make_request("POST", self.register_url, request_data) self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) @@ -100,7 +99,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase): ) def test_password_no_digit(self) -> None: - request_data = json.dumps({"username": "kermit", "password": "longerpassword"}) + request_data = {"username": "kermit", "password": "longerpassword"} channel = self.make_request("POST", self.register_url, request_data) self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) @@ -111,7 +110,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase): ) def test_password_no_symbol(self) -> None: - request_data = json.dumps({"username": "kermit", "password": "l0ngerpassword"}) + request_data = {"username": "kermit", "password": "l0ngerpassword"} channel = self.make_request("POST", self.register_url, request_data) self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) @@ -122,7 +121,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase): ) def test_password_no_uppercase(self) -> None: - request_data = json.dumps({"username": "kermit", "password": "l0ngerpassword!"}) + request_data = {"username": "kermit", "password": "l0ngerpassword!"} channel = self.make_request("POST", self.register_url, request_data) self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) @@ -133,7 +132,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase): ) def test_password_no_lowercase(self) -> None: - request_data = json.dumps({"username": "kermit", "password": "L0NGERPASSWORD!"}) + request_data = {"username": "kermit", "password": "L0NGERPASSWORD!"} channel = self.make_request("POST", self.register_url, request_data) self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) @@ -144,7 +143,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase): ) def test_password_compliant(self) -> None: - request_data = json.dumps({"username": "kermit", "password": "L0ngerpassword!"}) + request_data = {"username": "kermit", "password": "L0ngerpassword!"} channel = self.make_request("POST", self.register_url, request_data) # Getting a 401 here means the password has passed validation and the server has @@ -161,16 +160,14 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase): user_id = self.register_user("kermit", compliant_password) tok = self.login("kermit", compliant_password) - request_data = json.dumps( - { - "new_password": not_compliant_password, - "auth": { - "password": compliant_password, - "type": LoginType.PASSWORD, - "user": user_id, - }, - } - ) + request_data = { + "new_password": not_compliant_password, + "auth": { + "password": compliant_password, + "type": LoginType.PASSWORD, + "user": user_id, + }, + } channel = self.make_request( "POST", "/_matrix/client/r0/account/password", diff --git a/tests/rest/client/test_profile.py b/tests/rest/client/test_profile.py index 29bed0e872..8de5a342ae 100644 --- a/tests/rest/client/test_profile.py +++ b/tests/rest/client/test_profile.py @@ -153,18 +153,22 @@ class ProfileTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 400, channel.result) - def _get_displayname(self, name: Optional[str] = None) -> str: + def _get_displayname(self, name: Optional[str] = None) -> Optional[str]: channel = self.make_request( "GET", "/profile/%s/displayname" % (name or self.owner,) ) self.assertEqual(channel.code, 200, channel.result) - return channel.json_body["displayname"] + # FIXME: If a user has no displayname set, Synapse returns 200 and omits a + # displayname from the response. This contradicts the spec, see #13137. + return channel.json_body.get("displayname") - def _get_avatar_url(self, name: Optional[str] = None) -> str: + def _get_avatar_url(self, name: Optional[str] = None) -> Optional[str]: channel = self.make_request( "GET", "/profile/%s/avatar_url" % (name or self.owner,) ) self.assertEqual(channel.code, 200, channel.result) + # FIXME: If a user has no avatar set, Synapse returns 200 and omits an + # avatar_url from the response. This contradicts the spec, see #13137. return channel.json_body.get("avatar_url") @unittest.override_config({"max_avatar_size": 50}) diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py index afb08b2736..071b488cc0 100644 --- a/tests/rest/client/test_register.py +++ b/tests/rest/client/test_register.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import datetime -import json import os from typing import Any, Dict, List, Tuple @@ -62,9 +61,10 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): ) self.hs.get_datastores().main.services_cache.append(appservice) - request_data = json.dumps( - {"username": "as_user_kermit", "type": APP_SERVICE_REGISTRATION_TYPE} - ) + request_data = { + "username": "as_user_kermit", + "type": APP_SERVICE_REGISTRATION_TYPE, + } channel = self.make_request( b"POST", self.url + b"?access_token=i_am_an_app_service", request_data @@ -85,7 +85,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): ) self.hs.get_datastores().main.services_cache.append(appservice) - request_data = json.dumps({"username": "as_user_kermit"}) + request_data = {"username": "as_user_kermit"} channel = self.make_request( b"POST", self.url + b"?access_token=i_am_an_app_service", request_data @@ -95,9 +95,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): def test_POST_appservice_registration_invalid(self) -> None: self.appservice = None # no application service exists - request_data = json.dumps( - {"username": "kermit", "type": APP_SERVICE_REGISTRATION_TYPE} - ) + request_data = {"username": "kermit", "type": APP_SERVICE_REGISTRATION_TYPE} channel = self.make_request( b"POST", self.url + b"?access_token=i_am_an_app_service", request_data ) @@ -105,14 +103,14 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.result["code"], b"401", channel.result) def test_POST_bad_password(self) -> None: - request_data = json.dumps({"username": "kermit", "password": 666}) + request_data = {"username": "kermit", "password": 666} channel = self.make_request(b"POST", self.url, request_data) self.assertEqual(channel.result["code"], b"400", channel.result) self.assertEqual(channel.json_body["error"], "Invalid password") def test_POST_bad_username(self) -> None: - request_data = json.dumps({"username": 777, "password": "monkey"}) + request_data = {"username": 777, "password": "monkey"} channel = self.make_request(b"POST", self.url, request_data) self.assertEqual(channel.result["code"], b"400", channel.result) @@ -121,13 +119,12 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): def test_POST_user_valid(self) -> None: user_id = "@kermit:test" device_id = "frogfone" - params = { + request_data = { "username": "kermit", "password": "monkey", "device_id": device_id, "auth": {"type": LoginType.DUMMY}, } - request_data = json.dumps(params) channel = self.make_request(b"POST", self.url, request_data) det_data = { @@ -140,7 +137,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): @override_config({"enable_registration": False}) def test_POST_disabled_registration(self) -> None: - request_data = json.dumps({"username": "kermit", "password": "monkey"}) + request_data = {"username": "kermit", "password": "monkey"} self.auth_result = (None, {"username": "kermit", "password": "monkey"}, None) channel = self.make_request(b"POST", self.url, request_data) @@ -188,13 +185,12 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): @override_config({"rc_registration": {"per_second": 0.17, "burst_count": 5}}) def test_POST_ratelimiting(self) -> None: for i in range(0, 6): - params = { + request_data = { "username": "kermit" + str(i), "password": "monkey", "device_id": "frogfone", "auth": {"type": LoginType.DUMMY}, } - request_data = json.dumps(params) channel = self.make_request(b"POST", self.url, request_data) if i == 5: @@ -234,7 +230,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): } # Request without auth to get flows and session - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) self.assertEqual(channel.result["code"], b"401", channel.result) flows = channel.json_body["flows"] # Synapse adds a dummy stage to differentiate flows where otherwise one @@ -251,8 +247,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "token": token, "session": session, } - request_data = json.dumps(params) - channel = self.make_request(b"POST", self.url, request_data) + channel = self.make_request(b"POST", self.url, params) self.assertEqual(channel.result["code"], b"401", channel.result) completed = channel.json_body["completed"] self.assertCountEqual([LoginType.REGISTRATION_TOKEN], completed) @@ -262,8 +257,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "type": LoginType.DUMMY, "session": session, } - request_data = json.dumps(params) - channel = self.make_request(b"POST", self.url, request_data) + channel = self.make_request(b"POST", self.url, params) det_data = { "user_id": f"@{username}:{self.hs.hostname}", "home_server": self.hs.hostname, @@ -290,7 +284,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "password": "monkey", } # Request without auth to get session - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) session = channel.json_body["session"] # Test with token param missing (invalid) @@ -298,21 +292,21 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "type": LoginType.REGISTRATION_TOKEN, "session": session, } - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) self.assertEqual(channel.result["code"], b"401", channel.result) self.assertEqual(channel.json_body["errcode"], Codes.MISSING_PARAM) self.assertEqual(channel.json_body["completed"], []) # Test with non-string (invalid) params["auth"]["token"] = 1234 - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) self.assertEqual(channel.result["code"], b"401", channel.result) self.assertEqual(channel.json_body["errcode"], Codes.INVALID_PARAM) self.assertEqual(channel.json_body["completed"], []) # Test with unknown token (invalid) params["auth"]["token"] = "1234" - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) self.assertEqual(channel.result["code"], b"401", channel.result) self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED) self.assertEqual(channel.json_body["completed"], []) @@ -337,9 +331,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): params1: JsonDict = {"username": "bert", "password": "monkey"} params2: JsonDict = {"username": "ernie", "password": "monkey"} # Do 2 requests without auth to get two session IDs - channel1 = self.make_request(b"POST", self.url, json.dumps(params1)) + channel1 = self.make_request(b"POST", self.url, params1) session1 = channel1.json_body["session"] - channel2 = self.make_request(b"POST", self.url, json.dumps(params2)) + channel2 = self.make_request(b"POST", self.url, params2) session2 = channel2.json_body["session"] # Use token with session1 and check `pending` is 1 @@ -348,9 +342,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "token": token, "session": session1, } - self.make_request(b"POST", self.url, json.dumps(params1)) + self.make_request(b"POST", self.url, params1) # Repeat request to make sure pending isn't increased again - self.make_request(b"POST", self.url, json.dumps(params1)) + self.make_request(b"POST", self.url, params1) pending = self.get_success( store.db_pool.simple_select_one_onecol( "registration_tokens", @@ -366,14 +360,14 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "token": token, "session": session2, } - channel = self.make_request(b"POST", self.url, json.dumps(params2)) + channel = self.make_request(b"POST", self.url, params2) self.assertEqual(channel.result["code"], b"401", channel.result) self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED) self.assertEqual(channel.json_body["completed"], []) # Complete registration with session1 params1["auth"]["type"] = LoginType.DUMMY - self.make_request(b"POST", self.url, json.dumps(params1)) + self.make_request(b"POST", self.url, params1) # Check pending=0 and completed=1 res = self.get_success( store.db_pool.simple_select_one( @@ -386,7 +380,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEqual(res["completed"], 1) # Check auth still fails when using token with session2 - channel = self.make_request(b"POST", self.url, json.dumps(params2)) + channel = self.make_request(b"POST", self.url, params2) self.assertEqual(channel.result["code"], b"401", channel.result) self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED) self.assertEqual(channel.json_body["completed"], []) @@ -411,7 +405,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): ) params: JsonDict = {"username": "kermit", "password": "monkey"} # Request without auth to get session - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) session = channel.json_body["session"] # Check authentication fails with expired token @@ -420,7 +414,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "token": token, "session": session, } - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) self.assertEqual(channel.result["code"], b"401", channel.result) self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED) self.assertEqual(channel.json_body["completed"], []) @@ -435,7 +429,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): ) # Check authentication succeeds - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) completed = channel.json_body["completed"] self.assertCountEqual([LoginType.REGISTRATION_TOKEN], completed) @@ -460,9 +454,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): # Do 2 requests without auth to get two session IDs params1: JsonDict = {"username": "bert", "password": "monkey"} params2: JsonDict = {"username": "ernie", "password": "monkey"} - channel1 = self.make_request(b"POST", self.url, json.dumps(params1)) + channel1 = self.make_request(b"POST", self.url, params1) session1 = channel1.json_body["session"] - channel2 = self.make_request(b"POST", self.url, json.dumps(params2)) + channel2 = self.make_request(b"POST", self.url, params2) session2 = channel2.json_body["session"] # Use token with both sessions @@ -471,18 +465,18 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "token": token, "session": session1, } - self.make_request(b"POST", self.url, json.dumps(params1)) + self.make_request(b"POST", self.url, params1) params2["auth"] = { "type": LoginType.REGISTRATION_TOKEN, "token": token, "session": session2, } - self.make_request(b"POST", self.url, json.dumps(params2)) + self.make_request(b"POST", self.url, params2) # Complete registration with session1 params1["auth"]["type"] = LoginType.DUMMY - self.make_request(b"POST", self.url, json.dumps(params1)) + self.make_request(b"POST", self.url, params1) # Check `result` of registration token stage for session1 is `True` result1 = self.get_success( @@ -550,7 +544,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): # Do request without auth to get a session ID params: JsonDict = {"username": "kermit", "password": "monkey"} - channel = self.make_request(b"POST", self.url, json.dumps(params)) + channel = self.make_request(b"POST", self.url, params) session = channel.json_body["session"] # Use token @@ -559,7 +553,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "token": token, "session": session, } - self.make_request(b"POST", self.url, json.dumps(params)) + self.make_request(b"POST", self.url, params) # Delete token self.get_success( @@ -592,9 +586,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): "require_at_registration": True, }, "account_threepid_delegates": { - "email": "https://id_server", "msisdn": "https://id_server", }, + "email": {"notif_from": "Synapse <synapse@example.com>"}, } ) def test_advertised_flows_captcha_and_terms_and_3pids(self) -> None: @@ -827,8 +821,7 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): admin_tok = self.login("admin", "adminpassword") url = "/_synapse/admin/v1/account_validity/validity" - params = {"user_id": user_id} - request_data = json.dumps(params) + request_data = {"user_id": user_id} channel = self.make_request(b"POST", url, request_data, access_token=admin_tok) self.assertEqual(channel.result["code"], b"200", channel.result) @@ -845,12 +838,11 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): admin_tok = self.login("admin", "adminpassword") url = "/_synapse/admin/v1/account_validity/validity" - params = { + request_data = { "user_id": user_id, "expiration_ts": 0, "enable_renewal_emails": False, } - request_data = json.dumps(params) channel = self.make_request(b"POST", url, request_data, access_token=admin_tok) self.assertEqual(channel.result["code"], b"200", channel.result) @@ -870,12 +862,11 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): admin_tok = self.login("admin", "adminpassword") url = "/_synapse/admin/v1/account_validity/validity" - params = { + request_data = { "user_id": user_id, "expiration_ts": 0, "enable_renewal_emails": False, } - request_data = json.dumps(params) channel = self.make_request(b"POST", url, request_data, access_token=admin_tok) self.assertEqual(channel.result["code"], b"200", channel.result) @@ -1041,16 +1032,14 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): (user_id, tok) = self.create_user() - request_data = json.dumps( - { - "auth": { - "type": "m.login.password", - "user": user_id, - "password": "monkey", - }, - "erase": False, - } - ) + request_data = { + "auth": { + "type": "m.login.password", + "user": user_id, + "password": "monkey", + }, + "erase": False, + } channel = self.make_request( "POST", "account/deactivate", request_data, access_token=tok ) diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index aa84906548..ad03eee17b 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -800,7 +800,7 @@ class RelationPaginationTestCase(BaseRelationsTestCase): ) expected_event_ids.append(channel.json_body["event_id"]) - prev_token = "" + prev_token: Optional[str] = "" found_event_ids: List[str] = [] for _ in range(20): from_token = "" diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_report_event.py index 20a259fc43..ad0d0209f7 100644 --- a/tests/rest/client/test_report_event.py +++ b/tests/rest/client/test_report_event.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json - from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin @@ -77,10 +75,7 @@ class ReportEventTestCase(unittest.HomeserverTestCase): def _assert_status(self, response_status: int, data: JsonDict) -> None: channel = self.make_request( - "POST", - self.report_path, - json.dumps(data), - access_token=self.other_user_tok, + "POST", self.report_path, data, access_token=self.other_user_tok ) self.assertEqual( response_status, int(channel.result["code"]), msg=channel.result["body"] diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 35c59ee9e0..c45cb32090 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -18,11 +18,12 @@ """Tests REST events for /rooms paths.""" import json -from typing import Any, Dict, Iterable, List, Optional, Union +from http import HTTPStatus +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from unittest.mock import Mock, call from urllib import parse as urlparse -# `Literal` appears with Python 3.8. +from parameterized import param, parameterized from typing_extensions import Literal from twisted.test.proto_helpers import MemoryReactor @@ -33,7 +34,9 @@ from synapse.api.constants import ( EventContentFields, EventTypes, Membership, + PublicRoomsFilterFields, RelationTypes, + RoomTypes, ) from synapse.api.errors import Codes, HttpResponseException from synapse.handlers.pagination import PurgeStatus @@ -102,7 +105,7 @@ class RoomPermissionsTestCase(RoomBase): channel = self.make_request( "PUT", self.created_rmid_msg_path, b'{"msgtype":"m.text","body":"test msg"}' ) - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) # set topic for public room channel = self.make_request( @@ -110,7 +113,7 @@ class RoomPermissionsTestCase(RoomBase): ("rooms/%s/state/m.room.topic" % self.created_public_rmid).encode("ascii"), b'{"topic":"Public Room Topic"}', ) - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) # auth as user_id now self.helper.auth_user_id = self.user_id @@ -132,28 +135,28 @@ class RoomPermissionsTestCase(RoomBase): "/rooms/%s/send/m.room.message/mid2" % (self.uncreated_rmid,), msg_content, ) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # send message in created room not joined (no state), expect 403 channel = self.make_request("PUT", send_msg_path(), msg_content) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # send message in created room and invited, expect 403 self.helper.invite( room=self.created_rmid, src=self.rmcreator_id, targ=self.user_id ) channel = self.make_request("PUT", send_msg_path(), msg_content) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # send message in created room and joined, expect 200 self.helper.join(room=self.created_rmid, user=self.user_id) channel = self.make_request("PUT", send_msg_path(), msg_content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # send message in created room and left, expect 403 self.helper.leave(room=self.created_rmid, user=self.user_id) channel = self.make_request("PUT", send_msg_path(), msg_content) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) def test_topic_perms(self) -> None: topic_content = b'{"topic":"My Topic Name"}' @@ -163,28 +166,28 @@ class RoomPermissionsTestCase(RoomBase): channel = self.make_request( "PUT", "/rooms/%s/state/m.room.topic" % self.uncreated_rmid, topic_content ) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) channel = self.make_request( "GET", "/rooms/%s/state/m.room.topic" % self.uncreated_rmid ) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # set/get topic in created PRIVATE room not joined, expect 403 channel = self.make_request("PUT", topic_path, topic_content) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) channel = self.make_request("GET", topic_path) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # set topic in created PRIVATE room and invited, expect 403 self.helper.invite( room=self.created_rmid, src=self.rmcreator_id, targ=self.user_id ) channel = self.make_request("PUT", topic_path, topic_content) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # get topic in created PRIVATE room and invited, expect 403 channel = self.make_request("GET", topic_path) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # set/get topic in created PRIVATE room and joined, expect 200 self.helper.join(room=self.created_rmid, user=self.user_id) @@ -192,25 +195,25 @@ class RoomPermissionsTestCase(RoomBase): # Only room ops can set topic by default self.helper.auth_user_id = self.rmcreator_id channel = self.make_request("PUT", topic_path, topic_content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.helper.auth_user_id = self.user_id channel = self.make_request("GET", topic_path) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assert_dict(json.loads(topic_content.decode("utf8")), channel.json_body) # set/get topic in created PRIVATE room and left, expect 403 self.helper.leave(room=self.created_rmid, user=self.user_id) channel = self.make_request("PUT", topic_path, topic_content) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) channel = self.make_request("GET", topic_path) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # get topic in PUBLIC room, not joined, expect 403 channel = self.make_request( "GET", "/rooms/%s/state/m.room.topic" % self.created_public_rmid ) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) # set topic in PUBLIC room, not joined, expect 403 channel = self.make_request( @@ -218,7 +221,7 @@ class RoomPermissionsTestCase(RoomBase): "/rooms/%s/state/m.room.topic" % self.created_public_rmid, topic_content, ) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) def _test_get_membership( self, room: str, members: Iterable = frozenset(), expect_code: int = 200 @@ -307,14 +310,14 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=self.rmcreator_id, membership=Membership.JOIN, - expect_code=403, + expect_code=HTTPStatus.FORBIDDEN, ) self.helper.change_membership( room=room, src=self.user_id, targ=self.rmcreator_id, membership=Membership.LEAVE, - expect_code=403, + expect_code=HTTPStatus.FORBIDDEN, ) def test_joined_permissions(self) -> None: @@ -340,7 +343,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=other, membership=Membership.JOIN, - expect_code=403, + expect_code=HTTPStatus.FORBIDDEN, ) # set left of other, expect 403 @@ -349,7 +352,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=other, membership=Membership.LEAVE, - expect_code=403, + expect_code=HTTPStatus.FORBIDDEN, ) # set left of self, expect 200 @@ -369,7 +372,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=usr, membership=Membership.INVITE, - expect_code=403, + expect_code=HTTPStatus.FORBIDDEN, ) self.helper.change_membership( @@ -377,7 +380,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=usr, membership=Membership.JOIN, - expect_code=403, + expect_code=HTTPStatus.FORBIDDEN, ) # It is always valid to LEAVE if you've already left (currently.) @@ -386,7 +389,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=self.rmcreator_id, membership=Membership.LEAVE, - expect_code=403, + expect_code=HTTPStatus.FORBIDDEN, ) # tests the "from banned" line from the table in https://spec.matrix.org/unstable/client-server-api/#mroommember @@ -403,7 +406,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=other, membership=Membership.BAN, - expect_code=403, # expect failure + expect_code=HTTPStatus.FORBIDDEN, # expect failure expect_errcode=Codes.FORBIDDEN, ) @@ -413,7 +416,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.rmcreator_id, targ=other, membership=Membership.BAN, - expect_code=200, + expect_code=HTTPStatus.OK, ) # from ban to invite: Must never happen. @@ -422,7 +425,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.rmcreator_id, targ=other, membership=Membership.INVITE, - expect_code=403, # expect failure + expect_code=HTTPStatus.FORBIDDEN, # expect failure expect_errcode=Codes.BAD_STATE, ) @@ -432,7 +435,7 @@ class RoomPermissionsTestCase(RoomBase): src=other, targ=other, membership=Membership.JOIN, - expect_code=403, # expect failure + expect_code=HTTPStatus.FORBIDDEN, # expect failure expect_errcode=Codes.BAD_STATE, ) @@ -442,7 +445,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.rmcreator_id, targ=other, membership=Membership.BAN, - expect_code=200, + expect_code=HTTPStatus.OK, ) # from ban to knock: Must never happen. @@ -451,7 +454,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.rmcreator_id, targ=other, membership=Membership.KNOCK, - expect_code=403, # expect failure + expect_code=HTTPStatus.FORBIDDEN, # expect failure expect_errcode=Codes.BAD_STATE, ) @@ -461,7 +464,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.user_id, targ=other, membership=Membership.LEAVE, - expect_code=403, # expect failure + expect_code=HTTPStatus.FORBIDDEN, # expect failure expect_errcode=Codes.FORBIDDEN, ) @@ -471,7 +474,7 @@ class RoomPermissionsTestCase(RoomBase): src=self.rmcreator_id, targ=other, membership=Membership.LEAVE, - expect_code=200, + expect_code=HTTPStatus.OK, ) @@ -491,7 +494,7 @@ class RoomStateTestCase(RoomBase): "/rooms/%s/state" % room_id, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertCountEqual( [state_event["type"] for state_event in channel.json_body], { @@ -514,7 +517,7 @@ class RoomStateTestCase(RoomBase): "/rooms/%s/state/m.room.member/%s" % (room_id, self.user_id), ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertEqual(channel.json_body, {"membership": "join"}) @@ -528,16 +531,16 @@ class RoomsMemberListTestCase(RoomBase): def test_get_member_list(self) -> None: room_id = self.helper.create_room_as(self.user_id) channel = self.make_request("GET", "/rooms/%s/members" % room_id) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) def test_get_member_list_no_room(self) -> None: channel = self.make_request("GET", "/rooms/roomdoesnotexist/members") - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) def test_get_member_list_no_permission(self) -> None: room_id = self.helper.create_room_as("@some_other_guy:red") channel = self.make_request("GET", "/rooms/%s/members" % room_id) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) def test_get_member_list_no_permission_with_at_token(self) -> None: """ @@ -548,7 +551,7 @@ class RoomsMemberListTestCase(RoomBase): # first sync to get an at token channel = self.make_request("GET", "/sync") - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) sync_token = channel.json_body["next_batch"] # check that permission is denied for @sid1:red to get the @@ -557,7 +560,7 @@ class RoomsMemberListTestCase(RoomBase): "GET", f"/rooms/{room_id}/members?at={sync_token}", ) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) def test_get_member_list_no_permission_former_member(self) -> None: """ @@ -570,14 +573,14 @@ class RoomsMemberListTestCase(RoomBase): # check that the user can see the member list to start with channel = self.make_request("GET", "/rooms/%s/members" % room_id) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # ban the user self.helper.change_membership(room_id, "@alice:red", self.user_id, "ban") # check the user can no longer see the member list channel = self.make_request("GET", "/rooms/%s/members" % room_id) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) def test_get_member_list_no_permission_former_member_with_at_token(self) -> None: """ @@ -591,14 +594,14 @@ class RoomsMemberListTestCase(RoomBase): # sync to get an at token channel = self.make_request("GET", "/sync") - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) sync_token = channel.json_body["next_batch"] # check that the user can see the member list to start with channel = self.make_request( "GET", "/rooms/%s/members?at=%s" % (room_id, sync_token) ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # ban the user (Note: the user is actually allowed to see this event and # state so that they know they're banned!) @@ -610,14 +613,14 @@ class RoomsMemberListTestCase(RoomBase): # now, with the original user, sync again to get a new at token channel = self.make_request("GET", "/sync") - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) sync_token = channel.json_body["next_batch"] # check the user can no longer see the updated member list channel = self.make_request( "GET", "/rooms/%s/members?at=%s" % (room_id, sync_token) ) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) def test_get_member_list_mixed_memberships(self) -> None: room_creator = "@some_other_guy:red" @@ -626,17 +629,17 @@ class RoomsMemberListTestCase(RoomBase): self.helper.invite(room=room_id, src=room_creator, targ=self.user_id) # can't see list if you're just invited. channel = self.make_request("GET", room_path) - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) self.helper.join(room=room_id, user=self.user_id) # can see list now joined channel = self.make_request("GET", room_path) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.helper.leave(room=room_id, user=self.user_id) # can see old list once left channel = self.make_request("GET", room_path) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) def test_get_member_list_cancellation(self) -> None: """Test cancellation of a `/rooms/$room_id/members` request.""" @@ -649,7 +652,7 @@ class RoomsMemberListTestCase(RoomBase): "/rooms/%s/members" % room_id, ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertEqual(len(channel.json_body["chunk"]), 1) self.assertLessEqual( { @@ -669,7 +672,7 @@ class RoomsMemberListTestCase(RoomBase): # first sync to get an at token channel = self.make_request("GET", "/sync") - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) sync_token = channel.json_body["next_batch"] channel = make_request_with_cancellation_test( @@ -680,7 +683,7 @@ class RoomsMemberListTestCase(RoomBase): "/rooms/%s/members?at=%s" % (room_id, sync_token), ) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertEqual(len(channel.json_body["chunk"]), 1) self.assertLessEqual( { @@ -704,19 +707,34 @@ class RoomsCreateTestCase(RoomBase): # POST with no config keys, expect new room id channel = self.make_request("POST", "/createRoom", "{}") - self.assertEqual(200, channel.code, channel.result) + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) + assert channel.resource_usage is not None + self.assertEqual(44, channel.resource_usage.db_txn_count) + + def test_post_room_initial_state(self) -> None: + # POST with initial_state config key, expect new room id + channel = self.make_request( + "POST", + "/createRoom", + b'{"initial_state":[{"type": "m.bridge", "content": {}}]}', + ) + + self.assertEqual(HTTPStatus.OK, channel.code, channel.result) + self.assertTrue("room_id" in channel.json_body) + assert channel.resource_usage is not None + self.assertEqual(50, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id channel = self.make_request("POST", "/createRoom", b'{"visibility":"private"}') - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) self.assertTrue("room_id" in channel.json_body) def test_post_room_custom_key(self) -> None: # POST with custom config keys, expect new room id channel = self.make_request("POST", "/createRoom", b'{"custom":"stuff"}') - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) self.assertTrue("room_id" in channel.json_body) def test_post_room_known_and_unknown_keys(self) -> None: @@ -724,16 +742,16 @@ class RoomsCreateTestCase(RoomBase): channel = self.make_request( "POST", "/createRoom", b'{"visibility":"private","custom":"things"}' ) - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) self.assertTrue("room_id" in channel.json_body) def test_post_room_invalid_content(self) -> None: # POST with invalid content / paths, expect 400 channel = self.make_request("POST", "/createRoom", b'{"visibili') - self.assertEqual(400, channel.code) + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code) channel = self.make_request("POST", "/createRoom", b'["hello"]') - self.assertEqual(400, channel.code) + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code) def test_post_room_invitees_invalid_mxid(self) -> None: # POST with invalid invitee, see https://github.com/matrix-org/synapse/issues/4088 @@ -741,7 +759,7 @@ class RoomsCreateTestCase(RoomBase): channel = self.make_request( "POST", "/createRoom", b'{"invite":["@alice:example.com "]}' ) - self.assertEqual(400, channel.code) + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code) @unittest.override_config({"rc_invites": {"per_room": {"burst_count": 3}}}) def test_post_room_invitees_ratelimit(self) -> None: @@ -752,20 +770,18 @@ class RoomsCreateTestCase(RoomBase): # Build the request's content. We use local MXIDs because invites over federation # are more difficult to mock. - content = json.dumps( - { - "invite": [ - "@alice1:red", - "@alice2:red", - "@alice3:red", - "@alice4:red", - ] - } - ).encode("utf8") + content = { + "invite": [ + "@alice1:red", + "@alice2:red", + "@alice3:red", + "@alice4:red", + ] + } # Test that the invites are correctly ratelimited. channel = self.make_request("POST", "/createRoom", content) - self.assertEqual(400, channel.code) + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code) self.assertEqual( "Cannot invite so many users at once", channel.json_body["error"], @@ -778,7 +794,7 @@ class RoomsCreateTestCase(RoomBase): # Test that the invites aren't ratelimited anymore. channel = self.make_request("POST", "/createRoom", content) - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) def test_spam_checker_may_join_room_deprecated(self) -> None: """Tests that the user_may_join_room spam checker callback is correctly bypassed @@ -802,7 +818,7 @@ class RoomsCreateTestCase(RoomBase): "/createRoom", {}, ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) self.assertEqual(join_mock.call_count, 0) @@ -813,14 +829,14 @@ class RoomsCreateTestCase(RoomBase): In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`. """ - async def user_may_join_room( + async def user_may_join_room_codes( mxid: str, room_id: str, is_invite: bool, ) -> Codes: return Codes.CONSENT_NOT_GIVEN - join_mock = Mock(side_effect=user_may_join_room) + join_mock = Mock(side_effect=user_may_join_room_codes) self.hs.get_spam_checker()._user_may_join_room_callbacks.append(join_mock) channel = self.make_request( @@ -828,10 +844,29 @@ class RoomsCreateTestCase(RoomBase): "/createRoom", {}, ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) self.assertEqual(join_mock.call_count, 0) + # Now change the return value of the callback to deny any join. Since we're + # creating the room, despite the return value, we should be able to join. + async def user_may_join_room_tuple( + mxid: str, + room_id: str, + is_invite: bool, + ) -> Tuple[Codes, dict]: + return Codes.INCOMPATIBLE_ROOM_VERSION, {} + + join_mock.side_effect = user_may_join_room_tuple + + channel = self.make_request( + "POST", + "/createRoom", + {}, + ) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) + self.assertEqual(join_mock.call_count, 0) + class RoomTopicTestCase(RoomBase): """Tests /rooms/$room_id/topic REST events.""" @@ -846,54 +881,68 @@ class RoomTopicTestCase(RoomBase): def test_invalid_puts(self) -> None: # missing keys or invalid json channel = self.make_request("PUT", self.path, "{}") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", self.path, '{"_name":"bo"}') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", self.path, '{"nao') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request( "PUT", self.path, '[{"_name":"bo"},{"_name":"jill"}]' ) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", self.path, "text only") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", self.path, "") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) # valid key, wrong type content = '{"topic":["Topic name"]}' channel = self.make_request("PUT", self.path, content) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) def test_rooms_topic(self) -> None: # nothing should be there channel = self.make_request("GET", self.path) - self.assertEqual(404, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.result["body"]) # valid put content = '{"topic":"Topic name"}' channel = self.make_request("PUT", self.path, content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # valid get channel = self.make_request("GET", self.path) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assert_dict(json.loads(content), channel.json_body) def test_rooms_topic_with_extra_keys(self) -> None: # valid put with extra keys content = '{"topic":"Seasons","subtopic":"Summer"}' channel = self.make_request("PUT", self.path, content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # valid get channel = self.make_request("GET", self.path) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assert_dict(json.loads(content), channel.json_body) @@ -909,22 +958,34 @@ class RoomMemberStateTestCase(RoomBase): path = "/rooms/%s/state/m.room.member/%s" % (self.room_id, self.user_id) # missing keys or invalid json channel = self.make_request("PUT", path, "{}") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, '{"_name":"bo"}') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, '{"nao') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, b'[{"_name":"bo"},{"_name":"jill"}]') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, "text only") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, "") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) # valid keys, wrong types content = '{"membership":["%s","%s","%s"]}' % ( @@ -933,7 +994,9 @@ class RoomMemberStateTestCase(RoomBase): Membership.LEAVE, ) channel = self.make_request("PUT", path, content.encode("ascii")) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) def test_rooms_members_self(self) -> None: path = "/rooms/%s/state/m.room.member/%s" % ( @@ -944,10 +1007,10 @@ class RoomMemberStateTestCase(RoomBase): # valid join message (NOOP since we made the room) content = '{"membership":"%s"}' % Membership.JOIN channel = self.make_request("PUT", path, content.encode("ascii")) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) channel = self.make_request("GET", path, content=b"") - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) expected_response = {"membership": Membership.JOIN} self.assertEqual(expected_response, channel.json_body) @@ -962,10 +1025,10 @@ class RoomMemberStateTestCase(RoomBase): # valid invite message content = '{"membership":"%s"}' % Membership.INVITE channel = self.make_request("PUT", path, content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) channel = self.make_request("GET", path, content=b"") - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertEqual(json.loads(content), channel.json_body) def test_rooms_members_other_custom_keys(self) -> None: @@ -981,10 +1044,10 @@ class RoomMemberStateTestCase(RoomBase): "Join us!", ) channel = self.make_request("PUT", path, content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) channel = self.make_request("GET", path, content=b"") - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) self.assertEqual(json.loads(content), channel.json_body) @@ -1101,7 +1164,9 @@ class RoomJoinTestCase(RoomBase): # Now make the callback deny all room joins, and check that a join actually fails. return_value = False - self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2) + self.helper.join( + self.room3, self.user2, expect_code=HTTPStatus.FORBIDDEN, tok=self.tok2 + ) def test_spam_checker_may_join_room(self) -> None: """Tests that the user_may_join_room spam checker callback is correctly called @@ -1111,13 +1176,15 @@ class RoomJoinTestCase(RoomBase): """ # Register a dummy callback. Make it allow all room joins for now. - return_value: Union[Literal["NOT_SPAM"], Codes] = synapse.module_api.NOT_SPAM + return_value: Union[ + Literal["NOT_SPAM"], Tuple[Codes, dict], Codes + ] = synapse.module_api.NOT_SPAM async def user_may_join_room( userid: str, room_id: str, is_invited: bool, - ) -> Union[Literal["NOT_SPAM"], Codes]: + ) -> Union[Literal["NOT_SPAM"], Tuple[Codes, dict], Codes]: return return_value # `spec` argument is needed for this function mock to have `__qualname__`, which @@ -1161,8 +1228,28 @@ class RoomJoinTestCase(RoomBase): ) # Now make the callback deny all room joins, and check that a join actually fails. + # We pick an arbitrary Codes rather than the default `Codes.FORBIDDEN`. return_value = Codes.CONSENT_NOT_GIVEN - self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2) + self.helper.invite(self.room3, self.user1, self.user2, tok=self.tok1) + self.helper.join( + self.room3, + self.user2, + expect_code=HTTPStatus.FORBIDDEN, + expect_errcode=return_value, + tok=self.tok2, + ) + + # Now make the callback deny all room joins, and check that a join actually fails. + # As above, with the experimental extension that lets us return dictionaries. + return_value = (Codes.BAD_ALIAS, {"another_field": "12345"}) + self.helper.join( + self.room3, + self.user2, + expect_code=HTTPStatus.FORBIDDEN, + expect_errcode=return_value[0], + tok=self.tok2, + expect_additional_fields=return_value[1], + ) class RoomJoinRatelimitTestCase(RoomBase): @@ -1212,7 +1299,7 @@ class RoomJoinRatelimitTestCase(RoomBase): # Update the display name for the user. path = "/_matrix/client/r0/profile/%s/displayname" % self.user_id channel = self.make_request("PUT", path, {"displayname": "John Doe"}) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) # Check that all the rooms have been sent a profile update into. for room_id in room_ids: @@ -1277,40 +1364,153 @@ class RoomMessagesTestCase(RoomBase): path = "/rooms/%s/send/m.room.message/mid1" % (urlparse.quote(self.room_id)) # missing keys or invalid json channel = self.make_request("PUT", path, b"{}") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, b'{"_name":"bo"}') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, b'{"nao') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, b'[{"_name":"bo"},{"_name":"jill"}]') - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, b"text only") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) channel = self.make_request("PUT", path, b"") - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) def test_rooms_messages_sent(self) -> None: path = "/rooms/%s/send/m.room.message/mid1" % (urlparse.quote(self.room_id)) content = b'{"body":"test","msgtype":{"type":"a"}}' channel = self.make_request("PUT", path, content) - self.assertEqual(400, channel.code, msg=channel.result["body"]) + self.assertEqual( + HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"] + ) # custom message types content = b'{"body":"test","msgtype":"test.custom.text"}' channel = self.make_request("PUT", path, content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) # m.text message type path = "/rooms/%s/send/m.room.message/mid2" % (urlparse.quote(self.room_id)) content = b'{"body":"test2","msgtype":"m.text"}' channel = self.make_request("PUT", path, content) - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) + + @parameterized.expand( + [ + # Allow + param( + name="NOT_SPAM", + value="NOT_SPAM", + expected_code=HTTPStatus.OK, + expected_fields={}, + ), + param( + name="False", + value=False, + expected_code=HTTPStatus.OK, + expected_fields={}, + ), + # Block + param( + name="scalene string", + value="ANY OTHER STRING", + expected_code=HTTPStatus.FORBIDDEN, + expected_fields={"errcode": "M_FORBIDDEN"}, + ), + param( + name="True", + value=True, + expected_code=HTTPStatus.FORBIDDEN, + expected_fields={"errcode": "M_FORBIDDEN"}, + ), + param( + name="Code", + value=Codes.LIMIT_EXCEEDED, + expected_code=HTTPStatus.FORBIDDEN, + expected_fields={"errcode": "M_LIMIT_EXCEEDED"}, + ), + param( + name="Tuple", + value=(Codes.SERVER_NOT_TRUSTED, {"additional_field": "12345"}), + expected_code=HTTPStatus.FORBIDDEN, + expected_fields={ + "errcode": "M_SERVER_NOT_TRUSTED", + "additional_field": "12345", + }, + ), + ] + ) + def test_spam_checker_check_event_for_spam( + self, + name: str, + value: Union[str, bool, Codes, Tuple[Codes, JsonDict]], + expected_code: int, + expected_fields: dict, + ) -> None: + class SpamCheck: + mock_return_value: Union[ + str, bool, Codes, Tuple[Codes, JsonDict], bool + ] = "NOT_SPAM" + mock_content: Optional[JsonDict] = None + + async def check_event_for_spam( + self, + event: synapse.events.EventBase, + ) -> Union[str, Codes, Tuple[Codes, JsonDict], bool]: + self.mock_content = event.content + return self.mock_return_value + + spam_checker = SpamCheck() + + self.hs.get_spam_checker()._check_event_for_spam_callbacks.append( + spam_checker.check_event_for_spam + ) + + # Inject `value` as mock_return_value + spam_checker.mock_return_value = value + path = "/rooms/%s/send/m.room.message/check_event_for_spam_%s" % ( + urlparse.quote(self.room_id), + urlparse.quote(name), + ) + body = "test-%s" % name + content = '{"body":"%s","msgtype":"m.text"}' % body + channel = self.make_request("PUT", path, content) + + # Check that the callback has witnessed the correct event. + self.assertIsNotNone(spam_checker.mock_content) + if ( + spam_checker.mock_content is not None + ): # Checked just above, but mypy doesn't know about that. + self.assertEqual( + spam_checker.mock_content["body"], body, spam_checker.mock_content + ) + + # Check that we have the correct result. + self.assertEqual(expected_code, channel.code, msg=channel.result["body"]) + for expected_key, expected_value in expected_fields.items(): + self.assertEqual( + channel.json_body.get(expected_key, None), + expected_value, + "Field %s absent or invalid " % expected_key, + ) class RoomPowerLevelOverridesTestCase(RoomBase): @@ -1435,7 +1635,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase): channel = self.make_request("PUT", path, "{}") # Then I am allowed - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) def test_normal_user_can_not_post_state_event(self) -> None: # Given I am a normal member of a room @@ -1449,7 +1649,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase): channel = self.make_request("PUT", path, "{}") # Then I am not allowed because state events require PL>=50 - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) self.assertEqual( "You don't have permission to post that to the room. " "user_level (0) < send_level (50)", @@ -1476,7 +1676,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase): channel = self.make_request("PUT", path, "{}") # Then I am allowed - self.assertEqual(200, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"]) @unittest.override_config( { @@ -1504,7 +1704,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase): channel = self.make_request("PUT", path, "{}") # Then I am not allowed - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) @unittest.override_config( { @@ -1532,7 +1732,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase): channel = self.make_request("PUT", path, "{}") # Then I am not allowed - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) self.assertEqual( "You don't have permission to post that to the room. " + "user_level (0) < send_level (1)", @@ -1563,7 +1763,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase): # Then I am not allowed because the public_chat config does not # affect this room, because this room is a private_chat - self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"]) self.assertEqual( "You don't have permission to post that to the room. " + "user_level (0) < send_level (50)", @@ -1582,7 +1782,7 @@ class RoomInitialSyncTestCase(RoomBase): def test_initial_sync(self) -> None: channel = self.make_request("GET", "/rooms/%s/initialSync" % self.room_id) - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) self.assertEqual(self.room_id, channel.json_body["room_id"]) self.assertEqual("join", channel.json_body["membership"]) @@ -1625,7 +1825,7 @@ class RoomMessageListTestCase(RoomBase): channel = self.make_request( "GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token) ) - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) self.assertTrue("start" in channel.json_body) self.assertEqual(token, channel.json_body["start"]) self.assertTrue("chunk" in channel.json_body) @@ -1636,7 +1836,7 @@ class RoomMessageListTestCase(RoomBase): channel = self.make_request( "GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token) ) - self.assertEqual(200, channel.code) + self.assertEqual(HTTPStatus.OK, channel.code) self.assertTrue("start" in channel.json_body) self.assertEqual(token, channel.json_body["start"]) self.assertTrue("chunk" in channel.json_body) @@ -1675,7 +1875,7 @@ class RoomMessageListTestCase(RoomBase): json.dumps({"types": [EventTypes.Message]}), ), ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) chunk = channel.json_body["chunk"] self.assertEqual(len(chunk), 2, [event["content"] for event in chunk]) @@ -1703,7 +1903,7 @@ class RoomMessageListTestCase(RoomBase): json.dumps({"types": [EventTypes.Message]}), ), ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) chunk = channel.json_body["chunk"] self.assertEqual(len(chunk), 1, [event["content"] for event in chunk]) @@ -1720,7 +1920,7 @@ class RoomMessageListTestCase(RoomBase): json.dumps({"types": [EventTypes.Message]}), ), ) - self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) chunk = channel.json_body["chunk"] self.assertEqual(len(chunk), 0, [event["content"] for event in chunk]) @@ -1848,14 +2048,98 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase): def test_restricted_no_auth(self) -> None: channel = self.make_request("GET", self.url) - self.assertEqual(channel.code, 401, channel.result) + self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result) def test_restricted_auth(self) -> None: self.register_user("user", "pass") tok = self.login("user", "pass") channel = self.make_request("GET", self.url, access_token=tok) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + + +class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + + config = self.default_config() + config["allow_public_rooms_without_auth"] = True + config["experimental_features"] = {"msc3827_enabled": True} + self.hs = self.setup_test_homeserver(config=config) + self.url = b"/_matrix/client/r0/publicRooms" + + return self.hs + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + user = self.register_user("alice", "pass") + self.token = self.login(user, "pass") + + # Create a room + self.helper.create_room_as( + user, + is_public=True, + extra_content={"visibility": "public"}, + tok=self.token, + ) + # Create a space + self.helper.create_room_as( + user, + is_public=True, + extra_content={ + "visibility": "public", + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE}, + }, + tok=self.token, + ) + + def make_public_rooms_request( + self, room_types: Union[List[Union[str, None]], None] + ) -> Tuple[List[Dict[str, Any]], int]: + channel = self.make_request( + "POST", + self.url, + {"filter": {PublicRoomsFilterFields.ROOM_TYPES: room_types}}, + self.token, + ) + chunk = channel.json_body["chunk"] + count = channel.json_body["total_room_count_estimate"] + + self.assertEqual(len(chunk), count) + + return chunk, count + + def test_returns_both_rooms_and_spaces_if_no_filter(self) -> None: + chunk, count = self.make_public_rooms_request(None) + + self.assertEqual(count, 2) + + def test_returns_only_rooms_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request([None]) + + self.assertEqual(count, 1) + self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), None) + + def test_returns_only_space_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request(["m.space"]) + + self.assertEqual(count, 1) + self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), "m.space") + + def test_returns_both_rooms_and_space_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request(["m.space", None]) + + self.assertEqual(count, 2) + + def test_returns_both_rooms_and_spaces_if_array_is_empty(self) -> None: + chunk, count = self.make_public_rooms_request([]) + + self.assertEqual(count, 2) class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): @@ -1882,7 +2166,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): "Simple test for searching rooms over federation" self.federation_client.get_public_rooms.return_value = make_awaitable({}) # type: ignore[attr-defined] - search_filter = {"generic_search_term": "foobar"} + search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"} channel = self.make_request( "POST", @@ -1890,7 +2174,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): content={"filter": search_filter}, access_token=self.token, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.federation_client.get_public_rooms.assert_called_once_with( # type: ignore[attr-defined] "testserv", @@ -1907,11 +2191,11 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): # The `get_public_rooms` should be called again if the first call fails # with a 404, when using search filters. self.federation_client.get_public_rooms.side_effect = ( # type: ignore[attr-defined] - HttpResponseException(404, "Not Found", b""), + HttpResponseException(HTTPStatus.NOT_FOUND, "Not Found", b""), make_awaitable({}), ) - search_filter = {"generic_search_term": "foobar"} + search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"} channel = self.make_request( "POST", @@ -1919,7 +2203,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): content={"filter": search_filter}, access_token=self.token, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.federation_client.get_public_rooms.assert_has_calls( # type: ignore[attr-defined] [ @@ -1965,21 +2249,19 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase): # Set a profile for the test user self.displayname = "test user" - data = {"displayname": self.displayname} - request_data = json.dumps(data) + request_data = {"displayname": self.displayname} channel = self.make_request( "PUT", "/_matrix/client/r0/profile/%s/displayname" % (self.user_id,), request_data, access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok) def test_per_room_profile_forbidden(self) -> None: - data = {"membership": "join", "displayname": "other test user"} - request_data = json.dumps(data) + request_data = {"membership": "join", "displayname": "other test user"} channel = self.make_request( "PUT", "/_matrix/client/r0/rooms/%s/state/m.room.member/%s" @@ -1987,7 +2269,7 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase): request_data, access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) event_id = channel.json_body["event_id"] channel = self.make_request( @@ -1995,7 +2277,7 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase): "/_matrix/client/r0/rooms/%s/event/%s" % (self.room_id, event_id), access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) res_displayname = channel.json_body["content"]["displayname"] self.assertEqual(res_displayname, self.displayname, channel.result) @@ -2029,7 +2311,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): content={"reason": reason}, access_token=self.second_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self._check_for_reason(reason) @@ -2043,7 +2325,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): content={"reason": reason}, access_token=self.second_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self._check_for_reason(reason) @@ -2057,7 +2339,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): content={"reason": reason, "user_id": self.second_user_id}, access_token=self.second_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self._check_for_reason(reason) @@ -2071,7 +2353,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): content={"reason": reason, "user_id": self.second_user_id}, access_token=self.creator_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self._check_for_reason(reason) @@ -2083,7 +2365,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): content={"reason": reason, "user_id": self.second_user_id}, access_token=self.creator_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self._check_for_reason(reason) @@ -2095,7 +2377,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): content={"reason": reason, "user_id": self.second_user_id}, access_token=self.creator_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self._check_for_reason(reason) @@ -2114,7 +2396,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): content={"reason": reason}, access_token=self.second_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) self._check_for_reason(reason) @@ -2126,7 +2408,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase): ), access_token=self.creator_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) event_content = channel.json_body @@ -2174,7 +2456,7 @@ class LabelsTestCase(unittest.HomeserverTestCase): % (self.room_id, event_id, json.dumps(self.FILTER_LABELS)), access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) events_before = channel.json_body["events_before"] @@ -2204,7 +2486,7 @@ class LabelsTestCase(unittest.HomeserverTestCase): % (self.room_id, event_id, json.dumps(self.FILTER_NOT_LABELS)), access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) events_before = channel.json_body["events_before"] @@ -2239,7 +2521,7 @@ class LabelsTestCase(unittest.HomeserverTestCase): % (self.room_id, event_id, json.dumps(self.FILTER_LABELS_NOT_LABELS)), access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) events_before = channel.json_body["events_before"] @@ -2319,16 +2601,14 @@ class LabelsTestCase(unittest.HomeserverTestCase): def test_search_filter_labels(self) -> None: """Test that we can filter by a label on a /search request.""" - request_data = json.dumps( - { - "search_categories": { - "room_events": { - "search_term": "label", - "filter": self.FILTER_LABELS, - } + request_data = { + "search_categories": { + "room_events": { + "search_term": "label", + "filter": self.FILTER_LABELS, } } - ) + } self._send_labelled_messages_in_room() @@ -2356,16 +2636,14 @@ class LabelsTestCase(unittest.HomeserverTestCase): def test_search_filter_not_labels(self) -> None: """Test that we can filter by the absence of a label on a /search request.""" - request_data = json.dumps( - { - "search_categories": { - "room_events": { - "search_term": "label", - "filter": self.FILTER_NOT_LABELS, - } + request_data = { + "search_categories": { + "room_events": { + "search_term": "label", + "filter": self.FILTER_NOT_LABELS, } } - ) + } self._send_labelled_messages_in_room() @@ -2405,16 +2683,14 @@ class LabelsTestCase(unittest.HomeserverTestCase): """Test that we can filter by both a label and the absence of another label on a /search request. """ - request_data = json.dumps( - { - "search_categories": { - "room_events": { - "search_term": "label", - "filter": self.FILTER_LABELS_NOT_LABELS, - } + request_data = { + "search_categories": { + "room_events": { + "search_term": "label", + "filter": self.FILTER_LABELS_NOT_LABELS, } } - ) + } self._send_labelled_messages_in_room() @@ -2587,7 +2863,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): "/rooms/%s/messages?filter=%s&dir=b" % (self.room_id, json.dumps(filter)), access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) return channel.json_body["chunk"] @@ -2692,7 +2968,7 @@ class ContextTestCase(unittest.HomeserverTestCase): % (self.room_id, event_id), access_token=self.tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) events_before = channel.json_body["events_before"] @@ -2758,7 +3034,7 @@ class ContextTestCase(unittest.HomeserverTestCase): % (self.room_id, event_id), access_token=invited_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) events_before = channel.json_body["events_before"] @@ -2859,8 +3135,7 @@ class RoomAliasListTestCase(unittest.HomeserverTestCase): def _set_alias_via_directory(self, alias: str, expected_code: int = 200) -> None: url = "/_matrix/client/r0/directory/room/" + alias - data = {"room_id": self.room_id} - request_data = json.dumps(data) + request_data = {"room_id": self.room_id} channel = self.make_request( "PUT", url, request_data, access_token=self.room_owner_tok @@ -2889,8 +3164,7 @@ class RoomCanonicalAliasTestCase(unittest.HomeserverTestCase): def _set_alias_via_directory(self, alias: str, expected_code: int = 200) -> None: url = "/_matrix/client/r0/directory/room/" + alias - data = {"room_id": self.room_id} - request_data = json.dumps(data) + request_data = {"room_id": self.room_id} channel = self.make_request( "PUT", url, request_data, access_token=self.room_owner_tok @@ -2916,7 +3190,7 @@ class RoomCanonicalAliasTestCase(unittest.HomeserverTestCase): channel = self.make_request( "PUT", "rooms/%s/state/m.room.canonical_alias" % (self.room_id,), - json.dumps(content), + content, access_token=self.room_owner_tok, ) self.assertEqual(channel.code, expected_code, channel.result) @@ -3050,7 +3324,7 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): # Mock a few functions to prevent the test from failing due to failing to talk to # a remote IS. We keep the mock for make_and_store_3pid_invite around so we # can check its call_count later on during the test. - make_invite_mock = Mock(return_value=make_awaitable(0)) + make_invite_mock = Mock(return_value=make_awaitable((Mock(event_id="abc"), 0))) self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock self.hs.get_identity_handler().lookup_3pid = Mock( return_value=make_awaitable(None), @@ -3111,7 +3385,7 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): # Mock a few functions to prevent the test from failing due to failing to talk to # a remote IS. We keep the mock for make_and_store_3pid_invite around so we # can check its call_count later on during the test. - make_invite_mock = Mock(return_value=make_awaitable(0)) + make_invite_mock = Mock(return_value=make_awaitable((Mock(event_id="abc"), 0))) self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock self.hs.get_identity_handler().lookup_3pid = Mock( return_value=make_awaitable(None), @@ -3149,7 +3423,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): make_invite_mock.assert_called_once() # Now change the return value of the callback to deny any invite and test that - # we can't send the invite. + # we can't send the invite. We pick an arbitrary error code to be able to check + # that the same code has been returned mock.return_value = make_awaitable(Codes.CONSENT_NOT_GIVEN) channel = self.make_request( method="POST", @@ -3163,6 +3438,27 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): access_token=self.tok, ) self.assertEqual(channel.code, 403) + self.assertEqual(channel.json_body["errcode"], Codes.CONSENT_NOT_GIVEN) + + # Also check that it stopped before calling _make_and_store_3pid_invite. + make_invite_mock.assert_called_once() + + # Run variant with `Tuple[Codes, dict]`. + mock.return_value = make_awaitable((Codes.EXPIRED_ACCOUNT, {"field": "value"})) + channel = self.make_request( + method="POST", + path="/rooms/" + self.room_id + "/invite", + content={ + "id_server": "example.com", + "id_access_token": "sometoken", + "medium": "email", + "address": email_to_invite, + }, + access_token=self.tok, + ) + self.assertEqual(channel.code, 403) + self.assertEqual(channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT) + self.assertEqual(channel.json_body["field"], "value") # Also check that it stopped before calling _make_and_store_3pid_invite. make_invite_mock.assert_called_once() diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index e3efd1f1b0..b085c50356 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -606,11 +606,10 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): self._check_unread_count(1) # Send a read receipt to tell the server we've read the latest event. - body = json.dumps({ReceiptTypes.READ: res["event_id"]}).encode("utf8") channel = self.make_request( "POST", f"/rooms/{self.room_id}/read_markers", - body, + {ReceiptTypes.READ: res["event_id"]}, access_token=self.tok, ) self.assertEqual(channel.code, 200, channel.json_body) diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 5eb0f243f7..9a48e9286f 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -21,7 +21,6 @@ from synapse.api.constants import EventTypes, LoginType, Membership from synapse.api.errors import SynapseError from synapse.api.room_versions import RoomVersion from synapse.events import EventBase -from synapse.events.snapshot import EventContext from synapse.events.third_party_rules import load_legacy_third_party_event_rules from synapse.rest import admin from synapse.rest.client import account, login, profile, room @@ -113,14 +112,8 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): # Have this homeserver skip event auth checks. This is necessary due to # event auth checks ensuring that events were signed by the sender's homeserver. - async def _check_event_auth( - origin: str, - event: EventBase, - context: EventContext, - *args: Any, - **kwargs: Any, - ) -> EventContext: - return context + async def _check_event_auth(origin: Any, event: Any, context: Any) -> None: + pass hs.get_federation_event_handler()._check_event_auth = _check_event_auth # type: ignore[assignment] diff --git a/tests/rest/client/test_upgrade_room.py b/tests/rest/client/test_upgrade_room.py index 98c1039d33..5e7bf97482 100644 --- a/tests/rest/client/test_upgrade_room.py +++ b/tests/rest/client/test_upgrade_room.py @@ -48,10 +48,14 @@ class UpgradeRoomTest(unittest.HomeserverTestCase): self.helper.join(self.room_id, self.other, tok=self.other_token) def _upgrade_room( - self, token: Optional[str] = None, room_id: Optional[str] = None + self, + token: Optional[str] = None, + room_id: Optional[str] = None, + expire_cache: bool = True, ) -> FakeChannel: - # We never want a cached response. - self.reactor.advance(5 * 60 + 1) + if expire_cache: + # We don't want a cached response. + self.reactor.advance(5 * 60 + 1) if room_id is None: room_id = self.room_id @@ -72,9 +76,24 @@ class UpgradeRoomTest(unittest.HomeserverTestCase): self.assertEqual(200, channel.code, channel.result) self.assertIn("replacement_room", channel.json_body) - def test_not_in_room(self) -> None: + new_room_id = channel.json_body["replacement_room"] + + # Check that the tombstone event points to the new room. + tombstone_event = self.get_success( + self.hs.get_storage_controllers().state.get_current_state_event( + self.room_id, EventTypes.Tombstone, "" + ) + ) + self.assertIsNotNone(tombstone_event) + self.assertEqual(new_room_id, tombstone_event.content["replacement_room"]) + + # Check that the new room exists. + room = self.get_success(self.store.get_room(new_room_id)) + self.assertIsNotNone(room) + + def test_never_in_room(self) -> None: """ - Upgrading a room should work fine. + A user who has never been in the room cannot upgrade the room. """ # The user isn't in the room. roomless = self.register_user("roomless", "pass") @@ -83,6 +102,16 @@ class UpgradeRoomTest(unittest.HomeserverTestCase): channel = self._upgrade_room(roomless_token) self.assertEqual(403, channel.code, channel.result) + def test_left_room(self) -> None: + """ + A user who is no longer in the room cannot upgrade the room. + """ + # Remove the user from the room. + self.helper.leave(self.room_id, self.creator, tok=self.creator_token) + + channel = self._upgrade_room(self.creator_token) + self.assertEqual(403, channel.code, channel.result) + def test_power_levels(self) -> None: """ Another user can upgrade the room if their power level is increased. @@ -297,3 +326,47 @@ class UpgradeRoomTest(unittest.HomeserverTestCase): self.assertEqual( create_event.content.get(EventContentFields.ROOM_TYPE), test_room_type ) + + def test_second_upgrade_from_same_user(self) -> None: + """A second room upgrade from the same user is deduplicated.""" + channel1 = self._upgrade_room() + self.assertEqual(200, channel1.code, channel1.result) + + channel2 = self._upgrade_room(expire_cache=False) + self.assertEqual(200, channel2.code, channel2.result) + + self.assertEqual( + channel1.json_body["replacement_room"], + channel2.json_body["replacement_room"], + ) + + def test_second_upgrade_after_delay(self) -> None: + """A second room upgrade is not deduplicated after some time has passed.""" + channel1 = self._upgrade_room() + self.assertEqual(200, channel1.code, channel1.result) + + channel2 = self._upgrade_room(expire_cache=True) + self.assertEqual(200, channel2.code, channel2.result) + + self.assertNotEqual( + channel1.json_body["replacement_room"], + channel2.json_body["replacement_room"], + ) + + def test_second_upgrade_from_different_user(self) -> None: + """A second room upgrade from a different user is blocked.""" + channel = self._upgrade_room() + self.assertEqual(200, channel.code, channel.result) + + channel = self._upgrade_room(self.other_token, expire_cache=False) + self.assertEqual(400, channel.code, channel.result) + + def test_first_upgrade_does_not_block_second(self) -> None: + """A second room upgrade is not blocked when a previous upgrade attempt was not + allowed. + """ + channel = self._upgrade_room(self.other_token) + self.assertEqual(403, channel.code, channel.result) + + channel = self._upgrade_room(expire_cache=False) + self.assertEqual(200, channel.code, channel.result) diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index a0788b1bb0..105d418698 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -41,6 +41,7 @@ from twisted.web.resource import Resource from twisted.web.server import Site from synapse.api.constants import Membership +from synapse.api.errors import Codes from synapse.server import HomeServer from synapse.types import JsonDict @@ -135,7 +136,7 @@ class RestHelper: self.site, "POST", path, - json.dumps(content).encode("utf8"), + content, custom_headers=custom_headers, ) @@ -171,6 +172,8 @@ class RestHelper: expect_code: int = HTTPStatus.OK, tok: Optional[str] = None, appservice_user_id: Optional[str] = None, + expect_errcode: Optional[Codes] = None, + expect_additional_fields: Optional[dict] = None, ) -> None: self.change_membership( room=room, @@ -180,6 +183,8 @@ class RestHelper: appservice_user_id=appservice_user_id, membership=Membership.JOIN, expect_code=expect_code, + expect_errcode=expect_errcode, + expect_additional_fields=expect_additional_fields, ) def knock( @@ -205,7 +210,7 @@ class RestHelper: self.site, "POST", path, - json.dumps(data).encode("utf8"), + data, ) assert ( @@ -263,6 +268,7 @@ class RestHelper: appservice_user_id: Optional[str] = None, expect_code: int = HTTPStatus.OK, expect_errcode: Optional[str] = None, + expect_additional_fields: Optional[dict] = None, ) -> None: """ Send a membership state event into a room. @@ -303,7 +309,7 @@ class RestHelper: self.site, "PUT", path, - json.dumps(data).encode("utf8"), + data, ) assert ( @@ -323,6 +329,21 @@ class RestHelper: channel.result["body"], ) + if expect_additional_fields is not None: + for expect_key, expect_value in expect_additional_fields.items(): + assert expect_key in channel.json_body, "Expected field %s, got %s" % ( + expect_key, + channel.json_body, + ) + assert ( + channel.json_body[expect_key] == expect_value + ), "Expected: %s at %s, got: %s, resp: %s" % ( + expect_value, + expect_key, + channel.json_body[expect_key], + channel.json_body, + ) + self.auth_user_id = temp_id def send( @@ -371,7 +392,7 @@ class RestHelper: self.site, "PUT", path, - json.dumps(content or {}).encode("utf8"), + content or {}, custom_headers=custom_headers, ) diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/rest/media/v1/test_html_preview.py index ea9e5889bf..1062081a06 100644 --- a/tests/rest/media/v1/test_html_preview.py +++ b/tests/rest/media/v1/test_html_preview.py @@ -370,6 +370,64 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase): og = parse_html_to_open_graph(tree) self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."}) + def test_twitter_tag(self) -> None: + """Twitter card tags should be used if nothing else is available.""" + html = b""" + <html> + <meta name="twitter:card" content="summary"> + <meta name="twitter:description" content="Description"> + <meta name="twitter:site" content="@matrixdotorg"> + </html> + """ + tree = decode_body(html, "http://example.com/test.html") + og = parse_html_to_open_graph(tree) + self.assertEqual( + og, + { + "og:title": None, + "og:description": "Description", + "og:site_name": "@matrixdotorg", + }, + ) + + # But they shouldn't override Open Graph values. + html = b""" + <html> + <meta name="twitter:card" content="summary"> + <meta name="twitter:description" content="Description"> + <meta property="og:description" content="Real Description"> + <meta name="twitter:site" content="@matrixdotorg"> + <meta property="og:site_name" content="matrix.org"> + </html> + """ + tree = decode_body(html, "http://example.com/test.html") + og = parse_html_to_open_graph(tree) + self.assertEqual( + og, + { + "og:title": None, + "og:description": "Real Description", + "og:site_name": "matrix.org", + }, + ) + + def test_nested_nodes(self) -> None: + """A body with some nested nodes. Tests that we iterate over children + in the right order (and don't reverse the order of the text).""" + html = b""" + <a href="somewhere">Welcome <b>the bold <u>and underlined text <svg> + with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a> + """ + tree = decode_body(html, "http://example.com/test.html") + og = parse_html_to_open_graph(tree) + self.assertEqual( + og, + { + "og:title": None, + "og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text", + }, + ) + class MediaEncodingTestCase(unittest.TestCase): def test_meta_charset(self) -> None: diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 7204b2dfe0..d18fc13c21 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -23,11 +23,13 @@ from urllib import parse import attr from parameterized import parameterized, parameterized_class from PIL import Image as Image +from typing_extensions import Literal from twisted.internet import defer from twisted.internet.defer import Deferred from twisted.test.proto_helpers import MemoryReactor +from synapse.api.errors import Codes from synapse.events import EventBase from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.logging.context import make_deferred_yieldable @@ -124,7 +126,9 @@ class _TestImage: expected_scaled: The expected bytes from scaled thumbnailing, or None if test should just check for a valid image returned. expected_found: True if the file should exist on the server, or False if - a 404 is expected. + a 404/400 is expected. + unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or + False if the thumbnailing should succeed or a normal 404 is expected. """ data: bytes @@ -133,6 +137,7 @@ class _TestImage: expected_cropped: Optional[bytes] = None expected_scaled: Optional[bytes] = None expected_found: bool = True + unable_to_thumbnail: bool = False @parameterized_class( @@ -190,6 +195,7 @@ class _TestImage: b"image/gif", b".gif", expected_found=False, + unable_to_thumbnail=True, ), ), ], @@ -364,18 +370,29 @@ class MediaRepoTests(unittest.HomeserverTestCase): def test_thumbnail_crop(self) -> None: """Test that a cropped remote thumbnail is available.""" self._test_thumbnail( - "crop", self.test_image.expected_cropped, self.test_image.expected_found + "crop", + self.test_image.expected_cropped, + expected_found=self.test_image.expected_found, + unable_to_thumbnail=self.test_image.unable_to_thumbnail, ) def test_thumbnail_scale(self) -> None: """Test that a scaled remote thumbnail is available.""" self._test_thumbnail( - "scale", self.test_image.expected_scaled, self.test_image.expected_found + "scale", + self.test_image.expected_scaled, + expected_found=self.test_image.expected_found, + unable_to_thumbnail=self.test_image.unable_to_thumbnail, ) def test_invalid_type(self) -> None: """An invalid thumbnail type is never available.""" - self._test_thumbnail("invalid", None, False) + self._test_thumbnail( + "invalid", + None, + expected_found=False, + unable_to_thumbnail=self.test_image.unable_to_thumbnail, + ) @unittest.override_config( {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]} @@ -384,7 +401,12 @@ class MediaRepoTests(unittest.HomeserverTestCase): """ Override the config to generate only scaled thumbnails, but request a cropped one. """ - self._test_thumbnail("crop", None, False) + self._test_thumbnail( + "crop", + None, + expected_found=False, + unable_to_thumbnail=self.test_image.unable_to_thumbnail, + ) @unittest.override_config( {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]} @@ -393,14 +415,22 @@ class MediaRepoTests(unittest.HomeserverTestCase): """ Override the config to generate only cropped thumbnails, but request a scaled one. """ - self._test_thumbnail("scale", None, False) + self._test_thumbnail( + "scale", + None, + expected_found=False, + unable_to_thumbnail=self.test_image.unable_to_thumbnail, + ) def test_thumbnail_repeated_thumbnail(self) -> None: """Test that fetching the same thumbnail works, and deleting the on disk thumbnail regenerates it. """ self._test_thumbnail( - "scale", self.test_image.expected_scaled, self.test_image.expected_found + "scale", + self.test_image.expected_scaled, + expected_found=self.test_image.expected_found, + unable_to_thumbnail=self.test_image.unable_to_thumbnail, ) if not self.test_image.expected_found: @@ -457,8 +487,24 @@ class MediaRepoTests(unittest.HomeserverTestCase): ) def _test_thumbnail( - self, method: str, expected_body: Optional[bytes], expected_found: bool + self, + method: str, + expected_body: Optional[bytes], + expected_found: bool, + unable_to_thumbnail: bool = False, ) -> None: + """Test the given thumbnailing method works as expected. + + Args: + method: The thumbnailing method to use (crop, scale). + expected_body: The expected bytes from thumbnailing, or None if + test should just check for a valid image. + expected_found: True if the file should exist on the server, or False if + a 404/400 is expected. + unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or + False if the thumbnailing should succeed or a normal 404 is expected. + """ + params = "?width=32&height=32&method=" + method channel = make_request( self.reactor, @@ -481,6 +527,12 @@ class MediaRepoTests(unittest.HomeserverTestCase): if expected_found: self.assertEqual(channel.code, 200) + + self.assertEqual( + channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"), + [b"cross-origin"], + ) + if expected_body is not None: self.assertEqual( channel.result["body"], expected_body, channel.result["body"] @@ -488,6 +540,16 @@ class MediaRepoTests(unittest.HomeserverTestCase): else: # ensure that the result is at least some valid image Image.open(BytesIO(channel.result["body"])) + elif unable_to_thumbnail: + # A 400 with a JSON body. + self.assertEqual(channel.code, 400) + self.assertEqual( + channel.json_body, + { + "errcode": "M_UNKNOWN", + "error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)", + }, + ) else: # A 404 with a JSON body. self.assertEqual(channel.code, 404) @@ -549,10 +611,26 @@ class MediaRepoTests(unittest.HomeserverTestCase): [b"noindex, nofollow, noarchive, noimageindex"], ) + def test_cross_origin_resource_policy_header(self) -> None: + """ + Test that the Cross-Origin-Resource-Policy header is set to "cross-origin" + allowing web clients to embed media from the downloads API. + """ + channel = self._req(b"inline; filename=out" + self.test_image.extension) -class TestSpamChecker: + headers = channel.headers + + self.assertEqual( + headers.getRawHeaders(b"Cross-Origin-Resource-Policy"), + [b"cross-origin"], + ) + + +class TestSpamCheckerLegacy: """A spam checker module that rejects all media that includes the bytes `evil`. + + Uses the legacy Spam-Checker API. """ def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None: @@ -593,7 +671,7 @@ class TestSpamChecker: return b"evil" in buf.getvalue() -class SpamCheckerTestCase(unittest.HomeserverTestCase): +class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase): servlets = [ login.register_servlets, admin.register_servlets, @@ -617,7 +695,8 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase): { "spam_checker": [ { - "module": TestSpamChecker.__module__ + ".TestSpamChecker", + "module": TestSpamCheckerLegacy.__module__ + + ".TestSpamCheckerLegacy", "config": {}, } ] @@ -642,3 +721,62 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase): self.helper.upload_media( self.upload_resource, data, tok=self.tok, expect_code=400 ) + + +EVIL_DATA = b"Some evil data" +EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API" + + +class SpamCheckerTestCase(unittest.HomeserverTestCase): + servlets = [ + login.register_servlets, + admin.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.user = self.register_user("user", "pass") + self.tok = self.login("user", "pass") + + # Allow for uploading and downloading to/from the media repo + self.media_repo = hs.get_media_repository_resource() + self.download_resource = self.media_repo.children[b"download"] + self.upload_resource = self.media_repo.children[b"upload"] + + hs.get_module_api().register_spam_checker_callbacks( + check_media_file_for_spam=self.check_media_file_for_spam + ) + + async def check_media_file_for_spam( + self, file_wrapper: ReadableFileWrapper, file_info: FileInfo + ) -> Union[Codes, Literal["NOT_SPAM"]]: + buf = BytesIO() + await file_wrapper.write_chunks_to(buf.write) + + if buf.getvalue() == EVIL_DATA: + return Codes.FORBIDDEN + elif buf.getvalue() == EVIL_DATA_EXPERIMENT: + return (Codes.FORBIDDEN, {}) + else: + return "NOT_SPAM" + + def test_upload_innocent(self) -> None: + """Attempt to upload some innocent data that should be allowed.""" + self.helper.upload_media( + self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200 + ) + + def test_upload_ban(self) -> None: + """Attempt to upload some data that includes bytes "evil", which should + get rejected by the spam checker. + """ + + self.helper.upload_media( + self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400 + ) + + self.helper.upload_media( + self.upload_resource, + EVIL_DATA_EXPERIMENT, + tok=self.tok, + expect_code=400, + ) diff --git a/tests/rest/test_well_known.py b/tests/rest/test_well_known.py index 11f78f52b8..d8faafec75 100644 --- a/tests/rest/test_well_known.py +++ b/tests/rest/test_well_known.py @@ -59,6 +59,28 @@ class WellKnownTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, HTTPStatus.NOT_FOUND) + @unittest.override_config( + { + "public_baseurl": "https://tesths", + "default_identity_server": "https://testis", + "extra_well_known_client_content": {"custom": False}, + } + ) + def test_client_well_known_custom(self) -> None: + channel = self.make_request( + "GET", "/.well-known/matrix/client", shorthand=False + ) + + self.assertEqual(channel.code, HTTPStatus.OK) + self.assertEqual( + channel.json_body, + { + "m.homeserver": {"base_url": "https://tesths/"}, + "m.identity_server": {"base_url": "https://testis"}, + "custom": False, + }, + ) + @unittest.override_config({"serve_server_wellknown": True}) def test_server_well_known(self) -> None: channel = self.make_request( diff --git a/tests/server.py b/tests/server.py index b9f465971f..df3f1564c9 100644 --- a/tests/server.py +++ b/tests/server.py @@ -43,6 +43,7 @@ from twisted.internet.defer import Deferred, fail, maybeDeferred, succeed from twisted.internet.error import DNSLookupError from twisted.internet.interfaces import ( IAddress, + IConsumer, IHostnameResolver, IProtocol, IPullProducer, @@ -53,11 +54,7 @@ from twisted.internet.interfaces import ( ITransport, ) from twisted.python.failure import Failure -from twisted.test.proto_helpers import ( - AccumulatingProtocol, - MemoryReactor, - MemoryReactorClock, -) +from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock from twisted.web.http_headers import Headers from twisted.web.resource import IResource from twisted.web.server import Request, Site @@ -96,6 +93,7 @@ class TimedOutException(Exception): """ +@implementer(IConsumer) @attr.s(auto_attribs=True) class FakeChannel: """ @@ -104,7 +102,7 @@ class FakeChannel: """ site: Union[Site, "FakeSite"] - _reactor: MemoryReactor + _reactor: MemoryReactorClock result: dict = attr.Factory(dict) _ip: str = "127.0.0.1" _producer: Optional[Union[IPullProducer, IPushProducer]] = None @@ -122,7 +120,7 @@ class FakeChannel: self._request = request @property - def json_body(self): + def json_body(self) -> JsonDict: return json.loads(self.text_body) @property @@ -140,7 +138,7 @@ class FakeChannel: return self.result.get("done", False) @property - def code(self): + def code(self) -> int: if not self.result: raise Exception("No result yet.") return int(self.result["code"]) @@ -160,7 +158,7 @@ class FakeChannel: self.result["reason"] = reason self.result["headers"] = headers - def write(self, content): + def write(self, content: bytes) -> None: assert isinstance(content, bytes), "Should be bytes! " + repr(content) if "body" not in self.result: @@ -168,11 +166,16 @@ class FakeChannel: self.result["body"] += content - def registerProducer(self, producer, streaming): + # Type ignore: mypy doesn't like the fact that producer isn't an IProducer. + def registerProducer( # type: ignore[override] + self, + producer: Union[IPullProducer, IPushProducer], + streaming: bool, + ) -> None: self._producer = producer self.producerStreaming = streaming - def _produce(): + def _produce() -> None: if self._producer: self._producer.resumeProducing() self._reactor.callLater(0.1, _produce) @@ -180,31 +183,32 @@ class FakeChannel: if not streaming: self._reactor.callLater(0.0, _produce) - def unregisterProducer(self): + def unregisterProducer(self) -> None: if self._producer is None: return self._producer = None - def requestDone(self, _self): + def requestDone(self, _self: Request) -> None: self.result["done"] = True if isinstance(_self, SynapseRequest): + assert _self.logcontext is not None self.resource_usage = _self.logcontext.get_resource_usage() - def getPeer(self): + def getPeer(self) -> IAddress: # We give an address so that getClientAddress/getClientIP returns a non null entry, # causing us to record the MAU return address.IPv4Address("TCP", self._ip, 3423) - def getHost(self): + def getHost(self) -> IAddress: # this is called by Request.__init__ to configure Request.host. return address.IPv4Address("TCP", "127.0.0.1", 8888) - def isSecure(self): + def isSecure(self) -> bool: return False @property - def transport(self): + def transport(self) -> "FakeChannel": return self def await_result(self, timeout_ms: int = 1000) -> None: @@ -830,7 +834,6 @@ def setup_test_homeserver( # Mock TLS hs.tls_server_context_factory = Mock() - hs.tls_client_options_factory = Mock() hs.setup() if homeserver_to_use == TestHomeServer: diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py index 38963ce4a7..46d829b062 100644 --- a/tests/storage/databases/main/test_events_worker.py +++ b/tests/storage/databases/main/test_events_worker.py @@ -143,7 +143,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase): self.event_id = res["event_id"] # Reset the event cache so the tests start with it empty - self.store._get_event_cache.clear() + self.get_success(self.store._get_event_cache.clear()) def test_simple(self): """Test that we cache events that we pull from the DB.""" @@ -160,7 +160,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase): """ # Reset the event cache - self.store._get_event_cache.clear() + self.get_success(self.store._get_event_cache.clear()) with LoggingContext("test") as ctx: # We keep hold of the event event though we never use it. @@ -170,7 +170,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase): self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1) # Reset the event cache - self.store._get_event_cache.clear() + self.get_success(self.store._get_event_cache.clear()) with LoggingContext("test") as ctx: self.get_success(self.store.get_event(self.event_id)) @@ -345,7 +345,7 @@ class GetEventCancellationTestCase(unittest.HomeserverTestCase): self.event_id = res["event_id"] # Reset the event cache so the tests start with it empty - self.store._get_event_cache.clear() + self.get_success(self.store._get_event_cache.clear()) @contextmanager def blocking_get_event_calls( diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py index 9abd0cb446..1edb619630 100644 --- a/tests/storage/databases/main/test_room.py +++ b/tests/storage/databases/main/test_room.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json + +from synapse.api.constants import RoomTypes from synapse.rest import admin from synapse.rest.client import login, room from synapse.storage.databases.main.room import _BackgroundUpdates @@ -91,3 +94,69 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase): ) ) self.assertEqual(room_creator_after, self.user_id) + + def test_background_add_room_type_column(self): + """Test that the background update to populate the `room_type` column in + `room_stats_state` works properly. + """ + + # Create a room without a type + room_id = self._generate_room() + + # Get event_id of the m.room.create event + event_id = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="current_state_events", + keyvalues={ + "room_id": room_id, + "type": "m.room.create", + }, + retcol="event_id", + ) + ) + + # Fake a room creation event with a room type + event = { + "content": { + "creator": "@user:server.org", + "room_version": "9", + "type": RoomTypes.SPACE, + }, + "type": "m.room.create", + } + self.get_success( + self.store.db_pool.simple_update( + table="event_json", + keyvalues={"event_id": event_id}, + updatevalues={"json": json.dumps(event)}, + desc="test", + ) + ) + + # Insert and run the background update + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + "progress_json": "{}", + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store.db_pool.updates._all_done = False + + # Now let's actually drive the updates to completion + self.wait_for_background_updates() + + # Make sure the background update filled in the room type + room_type_after = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="room_stats_state", + keyvalues={"room_id": room_id}, + retcol="room_type", + allow_none=True, + ) + ) + self.assertEqual(room_type_after, RoomTypes.SPACE) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 4273524c4c..ba40124c8a 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -12,161 +12,162 @@ # See the License for the specific language governing permissions and # limitations under the License. -from unittest.mock import Mock +from twisted.test.proto_helpers import MemoryReactor +from synapse.rest import admin +from synapse.rest.client import login, room +from synapse.server import HomeServer from synapse.storage.databases.main.event_push_actions import NotifCounts +from synapse.util import Clock from tests.unittest import HomeserverTestCase USER_ID = "@user:example.com" -PlAIN_NOTIF = ["notify", {"set_tweak": "highlight", "value": False}] -HIGHLIGHT = [ - "notify", - {"set_tweak": "sound", "value": "default"}, - {"set_tweak": "highlight"}, -] - class EventPushActionsStoreTestCase(HomeserverTestCase): - def prepare(self, reactor, clock, hs): + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main - self.persist_events_store = hs.get_datastores().persist_events + persist_events_store = hs.get_datastores().persist_events + assert persist_events_store is not None + self.persist_events_store = persist_events_store - def test_get_unread_push_actions_for_user_in_range_for_http(self): + def test_get_unread_push_actions_for_user_in_range_for_http(self) -> None: self.get_success( self.store.get_unread_push_actions_for_user_in_range_for_http( USER_ID, 0, 1000, 20 ) ) - def test_get_unread_push_actions_for_user_in_range_for_email(self): + def test_get_unread_push_actions_for_user_in_range_for_email(self) -> None: self.get_success( self.store.get_unread_push_actions_for_user_in_range_for_email( USER_ID, 0, 1000, 20 ) ) - def test_count_aggregation(self): - room_id = "!foo:example.com" - user_id = "@user1235:example.com" + def test_count_aggregation(self) -> None: + # Create a user to receive notifications and send receipts. + user_id = self.register_user("user1235", "pass") + token = self.login("user1235", "pass") + + # And another users to send events. + other_id = self.register_user("other", "pass") + other_token = self.login("other", "pass") - last_read_stream_ordering = [0] + # Create a room and put both users in it. + room_id = self.helper.create_room_as(user_id, tok=token) + self.helper.join(room_id, other_id, tok=other_token) - def _assert_counts(noitf_count, highlight_count): + last_event_id: str + + def _assert_counts( + noitf_count: int, unread_count: int, highlight_count: int + ) -> None: counts = self.get_success( self.store.db_pool.runInteraction( - "", - self.store._get_unread_counts_by_pos_txn, + "get-unread-counts", + self.store._get_unread_counts_by_receipt_txn, room_id, user_id, - last_read_stream_ordering[0], ) ) self.assertEqual( counts, NotifCounts( notify_count=noitf_count, - unread_count=0, # Unread counts are tested in the sync tests. + unread_count=unread_count, highlight_count=highlight_count, ), ) - def _inject_actions(stream, action): - event = Mock() - event.room_id = room_id - event.event_id = "$test:example.com" - event.internal_metadata.stream_ordering = stream - event.internal_metadata.is_outlier.return_value = False - event.depth = stream - - self.get_success( - self.store.add_push_actions_to_staging( - event.event_id, - {user_id: action}, - False, - ) - ) - self.get_success( - self.store.db_pool.runInteraction( - "", - self.persist_events_store._set_push_actions_for_event_and_users_txn, - [(event, None)], - [(event, None)], - ) + def _create_event(highlight: bool = False) -> str: + result = self.helper.send_event( + room_id, + type="m.room.message", + content={"msgtype": "m.text", "body": user_id if highlight else "msg"}, + tok=other_token, ) + nonlocal last_event_id + last_event_id = result["event_id"] + return last_event_id - def _rotate(stream): - self.get_success( - self.store.db_pool.runInteraction( - "", self.store._rotate_notifs_before_txn, stream - ) - ) + def _rotate() -> None: + self.get_success(self.store._rotate_notifs()) - def _mark_read(stream, depth): - last_read_stream_ordering[0] = stream + def _mark_read(event_id: str) -> None: self.get_success( - self.store.db_pool.runInteraction( - "", - self.store._remove_old_push_actions_before_txn, + self.store.insert_receipt( room_id, - user_id, - stream, + "m.read", + user_id=user_id, + event_ids=[event_id], + data={}, ) ) - _assert_counts(0, 0) - _inject_actions(1, PlAIN_NOTIF) - _assert_counts(1, 0) - _rotate(2) - _assert_counts(1, 0) + _assert_counts(0, 0, 0) + _create_event() + _assert_counts(1, 1, 0) + _rotate() + _assert_counts(1, 1, 0) - _inject_actions(3, PlAIN_NOTIF) - _assert_counts(2, 0) - _rotate(4) - _assert_counts(2, 0) + event_id = _create_event() + _assert_counts(2, 2, 0) + _rotate() + _assert_counts(2, 2, 0) - _inject_actions(5, PlAIN_NOTIF) - _mark_read(3, 3) - _assert_counts(1, 0) + _create_event() + _mark_read(event_id) + _assert_counts(1, 1, 0) - _mark_read(5, 5) - _assert_counts(0, 0) + _mark_read(last_event_id) + _assert_counts(0, 0, 0) - _inject_actions(6, PlAIN_NOTIF) - _rotate(7) + _create_event() + _rotate() + _assert_counts(1, 1, 0) - self.get_success( - self.store.db_pool.simple_delete( - table="event_push_actions", keyvalues={"1": 1}, desc="" - ) - ) + # Delete old event push actions, this should not affect the (summarised) count. + self.get_success(self.store._remove_old_push_actions_that_have_rotated()) + _assert_counts(1, 1, 0) - _assert_counts(1, 0) + _mark_read(last_event_id) + _assert_counts(0, 0, 0) - _mark_read(7, 7) - _assert_counts(0, 0) - - _inject_actions(8, HIGHLIGHT) - _assert_counts(1, 1) - _rotate(9) - _assert_counts(1, 1) + event_id = _create_event(True) + _assert_counts(1, 1, 1) + _rotate() + _assert_counts(1, 1, 1) # Check that adding another notification and rotating after highlight # works. - _inject_actions(10, PlAIN_NOTIF) - _rotate(11) - _assert_counts(2, 1) + _create_event() + _rotate() + _assert_counts(2, 2, 1) # Check that sending read receipts at different points results in the # right counts. - _mark_read(8, 8) - _assert_counts(1, 0) - _mark_read(10, 10) - _assert_counts(0, 0) - - def test_find_first_stream_ordering_after_ts(self): - def add_event(so, ts): + _mark_read(event_id) + _assert_counts(1, 1, 0) + _mark_read(last_event_id) + _assert_counts(0, 0, 0) + + _create_event(True) + _assert_counts(1, 1, 1) + _mark_read(last_event_id) + _assert_counts(0, 0, 0) + _rotate() + _assert_counts(0, 0, 0) + + def test_find_first_stream_ordering_after_ts(self) -> None: + def add_event(so: int, ts: int) -> None: self.get_success( self.store.db_pool.simple_insert( "events", diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py index 8dfaa0559b..9c1182ed16 100644 --- a/tests/storage/test_purge.py +++ b/tests/storage/test_purge.py @@ -115,6 +115,6 @@ class PurgeTests(HomeserverTestCase): ) # The events aren't found. - self.store._invalidate_get_event_cache(create_event.event_id) + self.store._invalidate_local_get_event_cache(create_event.event_id) self.get_failure(self.store.get_event(create_event.event_id), NotFoundError) self.get_failure(self.store.get_event(first["event_id"]), NotFoundError) diff --git a/tests/replication/slave/storage/test_receipts.py b/tests/storage/test_receipts.py index 19f57115a1..b1a8f8bba7 100644 --- a/tests/replication/slave/storage/test_receipts.py +++ b/tests/storage/test_receipts.py @@ -13,23 +13,21 @@ # limitations under the License. from synapse.api.constants import ReceiptTypes -from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.types import UserID, create_requester from tests.test_utils.event_injection import create_event - -from ._base import BaseSlavedStoreTestCase +from tests.unittest import HomeserverTestCase OTHER_USER_ID = "@other:test" OUR_USER_ID = "@our:test" -class SlavedReceiptTestCase(BaseSlavedStoreTestCase): - - STORE_TYPE = SlavedReceiptsStore - +class ReceiptTestCase(HomeserverTestCase): def prepare(self, reactor, clock, homeserver): super().prepare(reactor, clock, homeserver) + + self.store = homeserver.get_datastores().main + self.room_creator = homeserver.get_room_creation_handler() self.persist_event_storage_controller = ( self.hs.get_storage_controllers().persistence @@ -87,14 +85,14 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): def test_return_empty_with_no_data(self): res = self.get_success( - self.master_store.get_receipts_for_user( + self.store.get_receipts_for_user( OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] ) ) self.assertEqual(res, {}) res = self.get_success( - self.master_store.get_receipts_for_user_with_orderings( + self.store.get_receipts_for_user_with_orderings( OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], ) @@ -102,7 +100,7 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): self.assertEqual(res, {}) res = self.get_success( - self.master_store.get_last_receipt_event_id_for_user( + self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id1, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], @@ -121,20 +119,20 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Send public read receipt for the first event self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {} ) ) # Send private read receipt for the second event self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {} ) ) # Test we get the latest event when we want both private and public receipts res = self.get_success( - self.master_store.get_receipts_for_user( + self.store.get_receipts_for_user( OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] ) ) @@ -142,26 +140,24 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Test we get the older event when we want only public receipt res = self.get_success( - self.master_store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ]) + self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ]) ) self.assertEqual(res, {self.room_id1: event1_1_id}) # Test we get the latest event when we want only the public receipt res = self.get_success( - self.master_store.get_receipts_for_user( - OUR_USER_ID, [ReceiptTypes.READ_PRIVATE] - ) + self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ_PRIVATE]) ) self.assertEqual(res, {self.room_id1: event1_2_id}) # Test receipt updating self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {} ) ) res = self.get_success( - self.master_store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ]) + self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ]) ) self.assertEqual(res, {self.room_id1: event1_2_id}) @@ -172,12 +168,12 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Test new room is reflected in what the method returns self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {} ) ) res = self.get_success( - self.master_store.get_receipts_for_user( + self.store.get_receipts_for_user( OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] ) ) @@ -194,20 +190,20 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Send public read receipt for the first event self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {} ) ) # Send private read receipt for the second event self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {} ) ) # Test we get the latest event when we want both private and public receipts res = self.get_success( - self.master_store.get_last_receipt_event_id_for_user( + self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id1, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], @@ -217,7 +213,7 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Test we get the older event when we want only public receipt res = self.get_success( - self.master_store.get_last_receipt_event_id_for_user( + self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id1, [ReceiptTypes.READ] ) ) @@ -225,7 +221,7 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Test we get the latest event when we want only the private receipt res = self.get_success( - self.master_store.get_last_receipt_event_id_for_user( + self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id1, [ReceiptTypes.READ_PRIVATE] ) ) @@ -233,12 +229,12 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Test receipt updating self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {} ) ) res = self.get_success( - self.master_store.get_last_receipt_event_id_for_user( + self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id1, [ReceiptTypes.READ] ) ) @@ -251,12 +247,12 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase): # Test new room is reflected in what the method returns self.get_success( - self.master_store.insert_receipt( + self.store.insert_receipt( self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {} ) ) res = self.get_success( - self.master_store.get_last_receipt_event_id_for_user( + self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id2, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py index 3c79dabc9f..3405efb6a8 100644 --- a/tests/storage/test_room.py +++ b/tests/storage/test_room.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.api.constants import EventTypes from synapse.api.room_versions import RoomVersions from synapse.types import RoomAlias, RoomID, UserID @@ -65,71 +64,3 @@ class RoomStoreTestCase(HomeserverTestCase): self.assertIsNone( (self.get_success(self.store.get_room_with_stats("!uknown:test"))), ) - - -class RoomEventsStoreTestCase(HomeserverTestCase): - def prepare(self, reactor, clock, hs): - # Room events need the full datastore, for persist_event() and - # get_room_state() - self.store = hs.get_datastores().main - self._storage_controllers = hs.get_storage_controllers() - self.event_factory = hs.get_event_factory() - - self.room = RoomID.from_string("!abcde:test") - - self.get_success( - self.store.store_room( - self.room.to_string(), - room_creator_user_id="@creator:text", - is_public=True, - room_version=RoomVersions.V1, - ) - ) - - def inject_room_event(self, **kwargs): - self.get_success( - self._storage_controllers.persistence.persist_event( - self.event_factory.create_event(room_id=self.room.to_string(), **kwargs) - ) - ) - - def STALE_test_room_name(self): - name = "A-Room-Name" - - self.inject_room_event( - etype=EventTypes.Name, name=name, content={"name": name}, depth=1 - ) - - state = self.get_success( - self._storage_controllers.state.get_current_state( - room_id=self.room.to_string() - ) - ) - - self.assertEqual(1, len(state)) - self.assertObjectHasAttributes( - {"type": "m.room.name", "room_id": self.room.to_string(), "name": name}, - state[0], - ) - - def STALE_test_room_topic(self): - topic = "A place for things" - - self.inject_room_event( - etype=EventTypes.Topic, topic=topic, content={"topic": topic}, depth=1 - ) - - state = self.get_success( - self._storage_controllers.state.get_current_state( - room_id=self.room.to_string() - ) - ) - - self.assertEqual(1, len(state)) - self.assertObjectHasAttributes( - {"type": "m.room.topic", "room_id": self.room.to_string(), "topic": topic}, - state[0], - ) - - # Not testing the various 'level' methods for now because there's lots - # of them and need coalescing; see JIRA SPEC-11 diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 1218786d79..240b02cb9f 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -23,7 +23,6 @@ from synapse.util import Clock from tests import unittest from tests.server import TestHomeServer -from tests.test_utils import event_injection class RoomMemberStoreTestCase(unittest.HomeserverTestCase): @@ -110,60 +109,6 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): # It now knows about Charlie's server. self.assertEqual(self.store._known_servers_count, 2) - def test_get_joined_users_from_context(self) -> None: - room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) - bob_event = self.get_success( - event_injection.inject_member_event( - self.hs, room, self.u_bob, Membership.JOIN - ) - ) - - # first, create a regular event - event, context = self.get_success( - event_injection.create_event( - self.hs, - room_id=room, - sender=self.u_alice, - prev_event_ids=[bob_event.event_id], - type="m.test.1", - content={}, - ) - ) - - users = self.get_success( - self.store.get_joined_users_from_context(event, context) - ) - self.assertEqual(users.keys(), {self.u_alice, self.u_bob}) - - # Regression test for #7376: create a state event whose key matches bob's - # user_id, but which is *not* a membership event, and persist that; then check - # that `get_joined_users_from_context` returns the correct users for the next event. - non_member_event = self.get_success( - event_injection.inject_event( - self.hs, - room_id=room, - sender=self.u_bob, - prev_event_ids=[bob_event.event_id], - type="m.test.2", - state_key=self.u_bob, - content={}, - ) - ) - event, context = self.get_success( - event_injection.create_event( - self.hs, - room_id=room, - sender=self.u_alice, - prev_event_ids=[non_member_event.event_id], - type="m.test.3", - content={}, - ) - ) - users = self.get_success( - self.store.get_joined_users_from_context(event, context) - ) - self.assertEqual(users.keys(), {self.u_alice, self.u_bob}) - def test__null_byte_in_display_name_properly_handled(self) -> None: room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 8043bdbde2..5564161750 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -369,8 +369,8 @@ class StateStoreTestCase(HomeserverTestCase): state_dict_ids = cache_entry.value self.assertEqual(cache_entry.full, False) - self.assertEqual(cache_entry.known_absent, {(e1.type, e1.state_key)}) - self.assertDictEqual(state_dict_ids, {(e1.type, e1.state_key): e1.event_id}) + self.assertEqual(cache_entry.known_absent, set()) + self.assertDictEqual(state_dict_ids, {}) ############################################ # test that things work with a partial cache @@ -387,7 +387,7 @@ class StateStoreTestCase(HomeserverTestCase): ) self.assertEqual(is_all, False) - self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict) + self.assertDictEqual({}, state_dict) room_id = self.room.to_string() (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( @@ -412,7 +412,7 @@ class StateStoreTestCase(HomeserverTestCase): ) self.assertEqual(is_all, False) - self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict) + self.assertDictEqual({}, state_dict) (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, @@ -443,7 +443,7 @@ class StateStoreTestCase(HomeserverTestCase): ) self.assertEqual(is_all, False) - self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict) + self.assertDictEqual({}, state_dict) (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py index 229ecd84a6..e42d7b9ba0 100644 --- a/tests/test_event_auth.py +++ b/tests/test_event_auth.py @@ -1,4 +1,4 @@ -# Copyright 2018 New Vector Ltd +# Copyright 2018-2022 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,17 +13,50 @@ # limitations under the License. import unittest -from typing import Optional +from typing import Collection, Dict, Iterable, List, Optional from parameterized import parameterized from synapse import event_auth from synapse.api.constants import EventContentFields -from synapse.api.errors import AuthError +from synapse.api.errors import AuthError, SynapseError from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions from synapse.events import EventBase, make_event_from_dict +from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import JsonDict, get_domain_from_id +from tests.test_utils import get_awaitable_result + + +class _StubEventSourceStore: + """A stub implementation of the EventSourceStore""" + + def __init__(self): + self._store: Dict[str, EventBase] = {} + + def add_event(self, event: EventBase): + self._store[event.event_id] = event + + def add_events(self, events: Iterable[EventBase]): + for event in events: + self._store[event.event_id] = event + + async def get_events( + self, + event_ids: Collection[str], + redact_behaviour: EventRedactBehaviour, + get_prev_content: bool = False, + allow_rejected: bool = False, + ) -> Dict[str, EventBase]: + assert allow_rejected + assert not get_prev_content + assert redact_behaviour == EventRedactBehaviour.as_is + results = {} + for e in event_ids: + if e in self._store: + results[e] = self._store[e] + return results + class EventAuthTestCase(unittest.TestCase): def test_rejected_auth_events(self): @@ -36,11 +69,15 @@ class EventAuthTestCase(unittest.TestCase): _join_event(RoomVersions.V9, creator), ] + event_store = _StubEventSourceStore() + event_store.add_events(auth_events) + # creator should be able to send state - event_auth.check_auth_rules_for_event( - _random_state_event(RoomVersions.V9, creator), - auth_events, + event = _random_state_event(RoomVersions.V9, creator, auth_events) + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, event) ) + event_auth.check_state_dependent_auth_rules(event, auth_events) # ... but a rejected join_rules event should cause it to be rejected rejected_join_rules = _join_rules_event( @@ -50,23 +87,154 @@ class EventAuthTestCase(unittest.TestCase): ) rejected_join_rules.rejected_reason = "stinky" auth_events.append(rejected_join_rules) + event_store.add_event(rejected_join_rules) - self.assertRaises( - AuthError, - event_auth.check_auth_rules_for_event, - _random_state_event(RoomVersions.V9, creator), - auth_events, - ) + with self.assertRaises(AuthError): + get_awaitable_result( + event_auth.check_state_independent_auth_rules( + event_store, + _random_state_event(RoomVersions.V9, creator), + ) + ) # ... even if there is *also* a good join rules auth_events.append(_join_rules_event(RoomVersions.V9, creator, "public")) + event_store.add_event(rejected_join_rules) - self.assertRaises( - AuthError, - event_auth.check_auth_rules_for_event, - _random_state_event(RoomVersions.V9, creator), - auth_events, + with self.assertRaises(AuthError): + get_awaitable_result( + event_auth.check_state_independent_auth_rules( + event_store, + _random_state_event(RoomVersions.V9, creator), + ) + ) + + def test_create_event_with_prev_events(self): + """A create event with prev_events should be rejected + + https://spec.matrix.org/v1.3/rooms/v9/#authorization-rules + 1: If type is m.room.create: + 1. If it has any previous events, reject. + """ + creator = f"@creator:{TEST_DOMAIN}" + + # we make both a good event and a bad event, to check that we are rejecting + # the bad event for the reason we think we are. + good_event = make_event_from_dict( + { + "room_id": TEST_ROOM_ID, + "type": "m.room.create", + "state_key": "", + "sender": creator, + "content": { + "creator": creator, + "room_version": RoomVersions.V9.identifier, + }, + "auth_events": [], + "prev_events": [], + }, + room_version=RoomVersions.V9, ) + bad_event = make_event_from_dict( + {**good_event.get_dict(), "prev_events": ["$fakeevent"]}, + room_version=RoomVersions.V9, + ) + + event_store = _StubEventSourceStore() + + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, good_event) + ) + with self.assertRaises(AuthError): + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, bad_event) + ) + + def test_duplicate_auth_events(self): + """Events with duplicate auth_events should be rejected + + https://spec.matrix.org/v1.3/rooms/v9/#authorization-rules + 2. Reject if event has auth_events that: + 1. have duplicate entries for a given type and state_key pair + """ + creator = "@creator:example.com" + + create_event = _create_event(RoomVersions.V9, creator) + join_event1 = _join_event(RoomVersions.V9, creator) + pl_event = _power_levels_event( + RoomVersions.V9, + creator, + {"state_default": 30, "users": {"creator": 100}}, + ) + + # create a second join event, so that we can make a duplicate + join_event2 = _join_event(RoomVersions.V9, creator) + + event_store = _StubEventSourceStore() + event_store.add_events([create_event, join_event1, join_event2, pl_event]) + + good_event = _random_state_event( + RoomVersions.V9, creator, [create_event, join_event2, pl_event] + ) + bad_event = _random_state_event( + RoomVersions.V9, creator, [create_event, join_event1, join_event2, pl_event] + ) + # a variation: two instances of the *same* event + bad_event2 = _random_state_event( + RoomVersions.V9, creator, [create_event, join_event2, join_event2, pl_event] + ) + + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, good_event) + ) + with self.assertRaises(AuthError): + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, bad_event) + ) + with self.assertRaises(AuthError): + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, bad_event2) + ) + + def test_unexpected_auth_events(self): + """Events with excess auth_events should be rejected + + https://spec.matrix.org/v1.3/rooms/v9/#authorization-rules + 2. Reject if event has auth_events that: + 2. have entries whose type and state_key don’t match those specified by the + auth events selection algorithm described in the server specification. + """ + creator = "@creator:example.com" + + create_event = _create_event(RoomVersions.V9, creator) + join_event = _join_event(RoomVersions.V9, creator) + pl_event = _power_levels_event( + RoomVersions.V9, + creator, + {"state_default": 30, "users": {"creator": 100}}, + ) + join_rules_event = _join_rules_event(RoomVersions.V9, creator, "public") + + event_store = _StubEventSourceStore() + event_store.add_events([create_event, join_event, pl_event, join_rules_event]) + + good_event = _random_state_event( + RoomVersions.V9, creator, [create_event, join_event, pl_event] + ) + # join rules should *not* be included in the auth events. + bad_event = _random_state_event( + RoomVersions.V9, + creator, + [create_event, join_event, pl_event, join_rules_event], + ) + + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, good_event) + ) + with self.assertRaises(AuthError): + get_awaitable_result( + event_auth.check_state_independent_auth_rules(event_store, bad_event) + ) def test_random_users_cannot_send_state_before_first_pl(self): """ @@ -82,7 +250,7 @@ class EventAuthTestCase(unittest.TestCase): ] # creator should be able to send state - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _random_state_event(RoomVersions.V1, creator), auth_events, ) @@ -90,7 +258,7 @@ class EventAuthTestCase(unittest.TestCase): # joiner should not be able to send state self.assertRaises( AuthError, - event_auth.check_auth_rules_for_event, + event_auth.check_state_dependent_auth_rules, _random_state_event(RoomVersions.V1, joiner), auth_events, ) @@ -119,13 +287,13 @@ class EventAuthTestCase(unittest.TestCase): # pleb should not be able to send state self.assertRaises( AuthError, - event_auth.check_auth_rules_for_event, + event_auth.check_state_dependent_auth_rules, _random_state_event(RoomVersions.V1, pleb), auth_events, ), # king should be able to send state - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _random_state_event(RoomVersions.V1, king), auth_events, ) @@ -140,27 +308,27 @@ class EventAuthTestCase(unittest.TestCase): ] # creator should be able to send aliases - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V1, creator), auth_events, ) # Reject an event with no state key. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V1, creator, state_key=""), auth_events, ) # If the domain of the sender does not match the state key, reject. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V1, creator, state_key="test.com"), auth_events, ) # Note that the member does *not* need to be in the room. - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V1, other), auth_events, ) @@ -175,24 +343,24 @@ class EventAuthTestCase(unittest.TestCase): ] # creator should be able to send aliases - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V6, creator), auth_events, ) # No particular checks are done on the state key. - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V6, creator, state_key=""), auth_events, ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V6, creator, state_key="test.com"), auth_events, ) # Per standard auth rules, the member must be in the room. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _alias_event(RoomVersions.V6, other), auth_events, ) @@ -220,12 +388,12 @@ class EventAuthTestCase(unittest.TestCase): # on room V1, pleb should be able to modify the notifications power level. if allow_modification: - event_auth.check_auth_rules_for_event(pl_event, auth_events) + event_auth.check_state_dependent_auth_rules(pl_event, auth_events) else: # But an MSC2209 room rejects this change. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event(pl_event, auth_events) + event_auth.check_state_dependent_auth_rules(pl_event, auth_events) def test_join_rules_public(self): """ @@ -243,14 +411,14 @@ class EventAuthTestCase(unittest.TestCase): } # Check join. - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) # A user cannot be force-joined to a room. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _member_event(RoomVersions.V6, pleb, "join", sender=creator), auth_events.values(), ) @@ -260,7 +428,7 @@ class EventAuthTestCase(unittest.TestCase): RoomVersions.V6, pleb, "ban" ) with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -269,7 +437,7 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V6, pleb, "leave" ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -278,7 +446,7 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V6, pleb, "join" ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -287,7 +455,7 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V6, pleb, "invite", sender=creator ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -309,14 +477,14 @@ class EventAuthTestCase(unittest.TestCase): # A join without an invite is rejected. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) # A user cannot be force-joined to a room. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _member_event(RoomVersions.V6, pleb, "join", sender=creator), auth_events.values(), ) @@ -326,7 +494,7 @@ class EventAuthTestCase(unittest.TestCase): RoomVersions.V6, pleb, "ban" ) with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -336,7 +504,7 @@ class EventAuthTestCase(unittest.TestCase): RoomVersions.V6, pleb, "leave" ) with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -345,7 +513,7 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V6, pleb, "join" ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -354,7 +522,7 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V6, pleb, "invite", sender=creator ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -376,7 +544,7 @@ class EventAuthTestCase(unittest.TestCase): } with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V6, pleb), auth_events.values(), ) @@ -413,7 +581,7 @@ class EventAuthTestCase(unittest.TestCase): EventContentFields.AUTHORISING_USER: "@creator:example.com" }, ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( authorised_join_event, auth_events.values(), ) @@ -429,7 +597,7 @@ class EventAuthTestCase(unittest.TestCase): pl_auth_events[("m.room.member", "@inviter:foo.test")] = _join_event( RoomVersions.V8, "@inviter:foo.test" ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event( RoomVersions.V8, pleb, @@ -442,7 +610,7 @@ class EventAuthTestCase(unittest.TestCase): # A join which is missing an authorised server is rejected. with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V8, pleb), auth_events.values(), ) @@ -455,7 +623,7 @@ class EventAuthTestCase(unittest.TestCase): {"invite": 100, "users": {"@other:example.com": 150}}, ) with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event( RoomVersions.V8, pleb, @@ -469,7 +637,7 @@ class EventAuthTestCase(unittest.TestCase): # A user cannot be force-joined to a room. (This uses an event which # *would* be valid, but is sent be a different user.) with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _member_event( RoomVersions.V8, pleb, @@ -487,7 +655,7 @@ class EventAuthTestCase(unittest.TestCase): RoomVersions.V8, pleb, "ban" ) with self.assertRaises(AuthError): - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( authorised_join_event, auth_events.values(), ) @@ -496,7 +664,7 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V8, pleb, "leave" ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( authorised_join_event, auth_events.values(), ) @@ -506,7 +674,7 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V8, pleb, "join" ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V8, pleb), auth_events.values(), ) @@ -516,15 +684,54 @@ class EventAuthTestCase(unittest.TestCase): auth_events[("m.room.member", pleb)] = _member_event( RoomVersions.V8, pleb, "invite", sender=creator ) - event_auth.check_auth_rules_for_event( + event_auth.check_state_dependent_auth_rules( _join_event(RoomVersions.V8, pleb), auth_events.values(), ) + def test_room_v10_rejects_string_power_levels(self) -> None: + pl_event_content = {"users_default": "42"} + pl_event = make_event_from_dict( + { + "room_id": TEST_ROOM_ID, + **_maybe_get_event_id_dict_for_room_version(RoomVersions.V10), + "type": "m.room.power_levels", + "sender": "@test:test.com", + "state_key": "", + "content": pl_event_content, + "signatures": {"test.com": {"ed25519:0": "some9signature"}}, + }, + room_version=RoomVersions.V10, + ) + + pl_event2_content = {"events": {"m.room.name": "42", "m.room.power_levels": 42}} + pl_event2 = make_event_from_dict( + { + "room_id": TEST_ROOM_ID, + **_maybe_get_event_id_dict_for_room_version(RoomVersions.V10), + "type": "m.room.power_levels", + "sender": "@test:test.com", + "state_key": "", + "content": pl_event2_content, + "signatures": {"test.com": {"ed25519:0": "some9signature"}}, + }, + room_version=RoomVersions.V10, + ) -# helpers for making events + with self.assertRaises(SynapseError): + event_auth._check_power_levels( + pl_event.room_version, pl_event, {("fake_type", "fake_key"): pl_event2} + ) + + with self.assertRaises(SynapseError): + event_auth._check_power_levels( + pl_event.room_version, pl_event2, {("fake_type", "fake_key"): pl_event} + ) -TEST_ROOM_ID = "!test:room" + +# helpers for making events +TEST_DOMAIN = "example.com" +TEST_ROOM_ID = f"!test_room:{TEST_DOMAIN}" def _create_event( @@ -539,6 +746,7 @@ def _create_event( "state_key": "", "sender": user_id, "content": {"creator": user_id}, + "auth_events": [], }, room_version=room_version, ) @@ -559,6 +767,7 @@ def _member_event( "sender": sender or user_id, "state_key": user_id, "content": {"membership": membership, **(additional_content or {})}, + "auth_events": [], "prev_events": [], }, room_version=room_version, @@ -609,7 +818,22 @@ def _alias_event(room_version: RoomVersion, sender: str, **kwargs) -> EventBase: return make_event_from_dict(data, room_version=room_version) -def _random_state_event(room_version: RoomVersion, sender: str) -> EventBase: +def _build_auth_dict_for_room_version( + room_version: RoomVersion, auth_events: Iterable[EventBase] +) -> List: + if room_version.event_format == EventFormatVersions.V1: + return [(e.event_id, "not_used") for e in auth_events] + else: + return [e.event_id for e in auth_events] + + +def _random_state_event( + room_version: RoomVersion, + sender: str, + auth_events: Optional[Iterable[EventBase]] = None, +) -> EventBase: + if auth_events is None: + auth_events = [] return make_event_from_dict( { "room_id": TEST_ROOM_ID, @@ -618,6 +842,7 @@ def _random_state_event(room_version: RoomVersion, sender: str) -> EventBase: "sender": sender, "state_key": "", "content": {"membership": "join"}, + "auth_events": _build_auth_dict_for_room_version(room_version, auth_events), }, room_version=room_version, ) diff --git a/tests/test_federation.py b/tests/test_federation.py index 0cbef70bfa..779fad1f63 100644 --- a/tests/test_federation.py +++ b/tests/test_federation.py @@ -81,12 +81,8 @@ class MessageAcceptTests(unittest.HomeserverTestCase): self.handler = self.homeserver.get_federation_handler() federation_event_handler = self.homeserver.get_federation_event_handler() - async def _check_event_auth( - origin, - event, - context, - ): - return context + async def _check_event_auth(origin, event, context): + pass federation_event_handler._check_event_auth = _check_event_auth self.client = self.homeserver.get_federation_client() diff --git a/tests/test_server.py b/tests/test_server.py index 847432f791..2fe4411401 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -14,7 +14,7 @@ import re from http import HTTPStatus -from typing import Tuple +from typing import Awaitable, Callable, Dict, NoReturn, Optional, Tuple from twisted.internet.defer import Deferred from twisted.web.resource import Resource @@ -36,6 +36,7 @@ from synapse.util import Clock from tests import unittest from tests.http.server._base import test_disconnect from tests.server import ( + FakeChannel, FakeSite, ThreadedMemoryReactorClock, make_request, @@ -44,7 +45,7 @@ from tests.server import ( class JsonResourceTests(unittest.TestCase): - def setUp(self): + def setUp(self) -> None: self.reactor = ThreadedMemoryReactorClock() self.hs_clock = Clock(self.reactor) self.homeserver = setup_test_homeserver( @@ -54,7 +55,7 @@ class JsonResourceTests(unittest.TestCase): reactor=self.reactor, ) - def test_handler_for_request(self): + def test_handler_for_request(self) -> None: """ JsonResource.handler_for_request gives correctly decoded URL args to the callback, while Twisted will give the raw bytes of URL query @@ -62,7 +63,9 @@ class JsonResourceTests(unittest.TestCase): """ got_kwargs = {} - def _callback(request, **kwargs): + def _callback( + request: SynapseRequest, **kwargs: object + ) -> Tuple[int, Dict[str, object]]: got_kwargs.update(kwargs) return 200, kwargs @@ -83,13 +86,13 @@ class JsonResourceTests(unittest.TestCase): self.assertEqual(got_kwargs, {"room_id": "\N{SNOWMAN}"}) - def test_callback_direct_exception(self): + def test_callback_direct_exception(self) -> None: """ If the web callback raises an uncaught exception, it will be translated into a 500. """ - def _callback(request, **kwargs): + def _callback(request: SynapseRequest, **kwargs: object) -> NoReturn: raise Exception("boo") res = JsonResource(self.homeserver) @@ -103,17 +106,17 @@ class JsonResourceTests(unittest.TestCase): self.assertEqual(channel.result["code"], b"500") - def test_callback_indirect_exception(self): + def test_callback_indirect_exception(self) -> None: """ If the web callback raises an uncaught exception in a Deferred, it will be translated into a 500. """ - def _throw(*args): + def _throw(*args: object) -> NoReturn: raise Exception("boo") - def _callback(request, **kwargs): - d = Deferred() + def _callback(request: SynapseRequest, **kwargs: object) -> "Deferred[None]": + d: "Deferred[None]" = Deferred() d.addCallback(_throw) self.reactor.callLater(0.5, d.callback, True) return make_deferred_yieldable(d) @@ -129,13 +132,13 @@ class JsonResourceTests(unittest.TestCase): self.assertEqual(channel.result["code"], b"500") - def test_callback_synapseerror(self): + def test_callback_synapseerror(self) -> None: """ If the web callback raises a SynapseError, it returns the appropriate status code and message set in it. """ - def _callback(request, **kwargs): + def _callback(request: SynapseRequest, **kwargs: object) -> NoReturn: raise SynapseError(403, "Forbidden!!one!", Codes.FORBIDDEN) res = JsonResource(self.homeserver) @@ -151,12 +154,12 @@ class JsonResourceTests(unittest.TestCase): self.assertEqual(channel.json_body["error"], "Forbidden!!one!") self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") - def test_no_handler(self): + def test_no_handler(self) -> None: """ If there is no handler to process the request, Synapse will return 400. """ - def _callback(request, **kwargs): + def _callback(request: SynapseRequest, **kwargs: object) -> None: """ Not ever actually called! """ @@ -175,14 +178,16 @@ class JsonResourceTests(unittest.TestCase): self.assertEqual(channel.json_body["error"], "Unrecognized request") self.assertEqual(channel.json_body["errcode"], "M_UNRECOGNIZED") - def test_head_request(self): + def test_head_request(self) -> None: """ JsonResource.handler_for_request gives correctly decoded URL args to the callback, while Twisted will give the raw bytes of URL query arguments. """ - def _callback(request, **kwargs): + def _callback( + request: SynapseRequest, **kwargs: object + ) -> Tuple[int, Dict[str, object]]: return 200, {"result": True} res = JsonResource(self.homeserver) @@ -203,20 +208,21 @@ class JsonResourceTests(unittest.TestCase): class OptionsResourceTests(unittest.TestCase): - def setUp(self): + def setUp(self) -> None: self.reactor = ThreadedMemoryReactorClock() class DummyResource(Resource): isLeaf = True - def render(self, request): - return request.path + def render(self, request: SynapseRequest) -> bytes: + # Type-ignore: mypy thinks request.path is Optional[Any], not bytes. + return request.path # type: ignore[return-value] # Setup a resource with some children. self.resource = OptionsResource() self.resource.putChild(b"res", DummyResource()) - def _make_request(self, method, path): + def _make_request(self, method: bytes, path: bytes) -> FakeChannel: """Create a request from the method/path and return a channel with the response.""" # Create a site and query for the resource. site = SynapseSite( @@ -225,7 +231,7 @@ class OptionsResourceTests(unittest.TestCase): parse_listener_def({"type": "http", "port": 0}), self.resource, "1.0", - max_request_body_size=1234, + max_request_body_size=4096, reactor=self.reactor, ) @@ -233,7 +239,7 @@ class OptionsResourceTests(unittest.TestCase): channel = make_request(self.reactor, site, method, path, shorthand=False) return channel - def test_unknown_options_request(self): + def test_unknown_options_request(self) -> None: """An OPTIONS requests to an unknown URL still returns 204 No Content.""" channel = self._make_request(b"OPTIONS", b"/foo/") self.assertEqual(channel.result["code"], b"204") @@ -253,7 +259,7 @@ class OptionsResourceTests(unittest.TestCase): "has CORS Headers header", ) - def test_known_options_request(self): + def test_known_options_request(self) -> None: """An OPTIONS requests to an known URL still returns 204 No Content.""" channel = self._make_request(b"OPTIONS", b"/res/") self.assertEqual(channel.result["code"], b"204") @@ -273,12 +279,12 @@ class OptionsResourceTests(unittest.TestCase): "has CORS Headers header", ) - def test_unknown_request(self): + def test_unknown_request(self) -> None: """A non-OPTIONS request to an unknown URL should 404.""" channel = self._make_request(b"GET", b"/foo/") self.assertEqual(channel.result["code"], b"404") - def test_known_request(self): + def test_known_request(self) -> None: """A non-OPTIONS request to an known URL should query the proper resource.""" channel = self._make_request(b"GET", b"/res/") self.assertEqual(channel.result["code"], b"200") @@ -287,16 +293,17 @@ class OptionsResourceTests(unittest.TestCase): class WrapHtmlRequestHandlerTests(unittest.TestCase): class TestResource(DirectServeHtmlResource): - callback = None + callback: Optional[Callable[..., Awaitable[None]]] - async def _async_render_GET(self, request): + async def _async_render_GET(self, request: SynapseRequest) -> None: + assert self.callback is not None await self.callback(request) - def setUp(self): + def setUp(self) -> None: self.reactor = ThreadedMemoryReactorClock() - def test_good_response(self): - async def callback(request): + def test_good_response(self) -> None: + async def callback(request: SynapseRequest) -> None: request.write(b"response") request.finish() @@ -311,13 +318,13 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase): body = channel.result["body"] self.assertEqual(body, b"response") - def test_redirect_exception(self): + def test_redirect_exception(self) -> None: """ If the callback raises a RedirectException, it is turned into a 30x with the right location. """ - async def callback(request, **kwargs): + async def callback(request: SynapseRequest, **kwargs: object) -> None: raise RedirectException(b"/look/an/eagle", 301) res = WrapHtmlRequestHandlerTests.TestResource() @@ -332,13 +339,13 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase): location_headers = [v for k, v in headers if k == b"Location"] self.assertEqual(location_headers, [b"/look/an/eagle"]) - def test_redirect_exception_with_cookie(self): + def test_redirect_exception_with_cookie(self) -> None: """ If the callback raises a RedirectException which sets a cookie, that is returned too """ - async def callback(request, **kwargs): + async def callback(request: SynapseRequest, **kwargs: object) -> NoReturn: e = RedirectException(b"/no/over/there", 304) e.cookies.append(b"session=yespls") raise e @@ -357,10 +364,10 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase): cookies_headers = [v for k, v in headers if k == b"Set-Cookie"] self.assertEqual(cookies_headers, [b"session=yespls"]) - def test_head_request(self): + def test_head_request(self) -> None: """A head request should work by being turned into a GET request.""" - async def callback(request): + async def callback(request: SynapseRequest) -> None: request.write(b"response") request.finish() @@ -410,7 +417,7 @@ class CancellableDirectServeHtmlResource(DirectServeHtmlResource): class DirectServeJsonResourceCancellationTests(unittest.TestCase): """Tests for `DirectServeJsonResource` cancellation.""" - def setUp(self): + def setUp(self) -> None: self.reactor = ThreadedMemoryReactorClock() self.clock = Clock(self.reactor) self.resource = CancellableDirectServeJsonResource(self.clock) @@ -444,7 +451,7 @@ class DirectServeJsonResourceCancellationTests(unittest.TestCase): class DirectServeHtmlResourceCancellationTests(unittest.TestCase): """Tests for `DirectServeHtmlResource` cancellation.""" - def setUp(self): + def setUp(self) -> None: self.reactor = ThreadedMemoryReactorClock() self.clock = Clock(self.reactor) self.resource = CancellableDirectServeHtmlResource(self.clock) diff --git a/tests/test_state.py b/tests/test_state.py index b005dd8d0f..bafd6d1750 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -21,7 +21,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.events import make_event_from_dict from synapse.events.snapshot import EventContext -from synapse.state import StateHandler, StateResolutionHandler +from synapse.state import StateHandler, StateResolutionHandler, _make_state_cache_entry from synapse.util import Clock from synapse.util.macaroons import MacaroonGenerator @@ -99,6 +99,10 @@ class _DummyStore: state_group = self._next_group self._next_group += 1 + if current_state_ids is None: + current_state_ids = dict(self._group_to_state[prev_group]) + current_state_ids.update(delta_ids) + self._group_to_state[state_group] = dict(current_state_ids) return state_group @@ -131,7 +135,9 @@ class _DummyStore: async def get_room_version_id(self, room_id): return RoomVersions.V1.identifier - async def get_state_group_for_events(self, event_ids): + async def get_state_group_for_events( + self, event_ids, await_full_state: bool = True + ): res = {} for event in event_ids: res[event] = self._event_to_state_group[event] @@ -193,6 +199,8 @@ class StateTestCase(unittest.TestCase): "get_state_resolution_handler", "get_account_validity_handler", "get_macaroon_generator", + "get_instance_name", + "get_simple_http_client", "hostname", ] ) @@ -756,3 +764,43 @@ class StateTestCase(unittest.TestCase): result = yield defer.ensureDeferred(self.state.compute_event_context(event)) return result + + def test_make_state_cache_entry(self): + "Test that calculating a prev_group and delta is correct" + + new_state = { + ("a", ""): "E", + ("b", ""): "E", + ("c", ""): "E", + ("d", ""): "E", + } + + # old_state_1 has fewer differences to new_state than old_state_2, but + # the delta involves deleting a key, which isn't allowed in the deltas, + # so we should pick old_state_2 as the prev_group. + + # `old_state_1` has two differences: `a` and `e` + old_state_1 = { + ("a", ""): "F", + ("b", ""): "E", + ("c", ""): "E", + ("d", ""): "E", + ("e", ""): "E", + } + + # `old_state_2` has three differences: `a`, `c` and `d` + old_state_2 = { + ("a", ""): "F", + ("b", ""): "E", + ("c", ""): "F", + ("d", ""): "F", + } + + entry = _make_state_cache_entry(new_state, {1: old_state_1, 2: old_state_2}) + + self.assertEqual(entry.prev_group, 2) + + # There are three changes from `old_state_2` to `new_state` + self.assertEqual( + entry.delta_ids, {("a", ""): "E", ("c", ""): "E", ("d", ""): "E"} + ) diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py index 37fada5c53..d3c13cf14c 100644 --- a/tests/test_terms_auth.py +++ b/tests/test_terms_auth.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json from unittest.mock import Mock from twisted.test.proto_helpers import MemoryReactorClock @@ -51,7 +50,7 @@ class TermsTestCase(unittest.HomeserverTestCase): def test_ui_auth(self): # Do a UI auth request - request_data = json.dumps({"username": "kermit", "password": "monkey"}) + request_data = {"username": "kermit", "password": "monkey"} channel = self.make_request(b"POST", self.url, request_data) self.assertEqual(channel.result["code"], b"401", channel.result) @@ -82,16 +81,14 @@ class TermsTestCase(unittest.HomeserverTestCase): self.assertDictContainsSubset(channel.json_body["params"], expected_params) # We have to complete the dummy auth stage before completing the terms stage - request_data = json.dumps( - { - "username": "kermit", - "password": "monkey", - "auth": { - "session": channel.json_body["session"], - "type": "m.login.dummy", - }, - } - ) + request_data = { + "username": "kermit", + "password": "monkey", + "auth": { + "session": channel.json_body["session"], + "type": "m.login.dummy", + }, + } self.registration_handler.check_username = Mock(return_value=True) @@ -102,16 +99,14 @@ class TermsTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.result["code"], b"401", channel.result) # Finish the UI auth for terms - request_data = json.dumps( - { - "username": "kermit", - "password": "monkey", - "auth": { - "session": channel.json_body["session"], - "type": "m.login.terms", - }, - } - ) + request_data = { + "username": "kermit", + "password": "monkey", + "auth": { + "session": channel.json_body["session"], + "type": "m.login.terms", + }, + } channel = self.make_request(b"POST", self.url, request_data) # We're interested in getting a response that looks like a successful diff --git a/tests/test_visibility.py b/tests/test_visibility.py index f338af6c36..c385b2f8d4 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -272,7 +272,7 @@ class FilterEventsForClientTestCase(unittest.FederatingHomeserverTestCase): "state_key": "@user:test", "content": {"membership": "invite"}, } - self.add_hashes_and_signatures(invite_pdu) + self.add_hashes_and_signatures_from_other_server(invite_pdu) invite_event_id = make_event_from_dict(invite_pdu, RoomVersions.V9).event_id self.get_success( diff --git a/tests/unittest.py b/tests/unittest.py index c645dd3563..66ce92f4a6 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -16,7 +16,6 @@ import gc import hashlib import hmac -import json import logging import secrets import time @@ -285,7 +284,7 @@ class HomeserverTestCase(TestCase): config=self.hs.config.server.listeners[0], resource=self.resource, server_version_string="1", - max_request_body_size=1234, + max_request_body_size=4096, reactor=self.reactor, ) @@ -619,20 +618,16 @@ class HomeserverTestCase(TestCase): want_mac.update(nonce.encode("ascii") + b"\x00" + nonce_str) want_mac_digest = want_mac.hexdigest() - body = json.dumps( - { - "nonce": nonce, - "username": username, - "displayname": displayname, - "password": password, - "admin": admin, - "mac": want_mac_digest, - "inhibit_login": True, - } - ) - channel = self.make_request( - "POST", "/_synapse/admin/v1/register", body.encode("utf8") - ) + body = { + "nonce": nonce, + "username": username, + "displayname": displayname, + "password": password, + "admin": admin, + "mac": want_mac_digest, + "inhibit_login": True, + } + channel = self.make_request("POST", "/_synapse/admin/v1/register", body) self.assertEqual(channel.code, 200, channel.json_body) user_id = channel.json_body["user_id"] @@ -676,9 +671,7 @@ class HomeserverTestCase(TestCase): custom_headers: Optional[Iterable[CustomHeaderType]] = None, ) -> str: """ - Log in a user, and get an access token. Requires the Login API be - registered. - + Log in a user, and get an access token. Requires the Login API be registered. """ body = {"type": "m.login.password", "user": username, "password": password} if device_id: @@ -687,7 +680,7 @@ class HomeserverTestCase(TestCase): channel = self.make_request( "POST", "/_matrix/client/r0/login", - json.dumps(body).encode("utf8"), + body, custom_headers=custom_headers, ) self.assertEqual(channel.code, 200, channel.result) @@ -780,7 +773,7 @@ class FederatingHomeserverTestCase(HomeserverTestCase): verify_key_id, FetchKeyResult( verify_key=verify_key, - valid_until_ts=clock.time_msec() + 1000, + valid_until_ts=clock.time_msec() + 10000, ), ) ], @@ -838,7 +831,7 @@ class FederatingHomeserverTestCase(HomeserverTestCase): client_ip=client_ip, ) - def add_hashes_and_signatures( + def add_hashes_and_signatures_from_other_server( self, event_dict: JsonDict, room_version: RoomVersion = KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION], diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index bee66dee43..e8b6246ab5 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -20,7 +20,7 @@ from tests import unittest class DictCacheTestCase(unittest.TestCase): def setUp(self): - self.cache = DictionaryCache("foobar") + self.cache = DictionaryCache("foobar", max_entries=10) def test_simple_cache_hit_full(self): key = "test_simple_cache_hit_full" @@ -76,13 +76,13 @@ class DictCacheTestCase(unittest.TestCase): seq = self.cache.sequence test_value_1 = {"test": "test_simple_cache_hit_miss_partial"} - self.cache.update(seq, key, test_value_1, fetched_keys=set("test")) + self.cache.update(seq, key, test_value_1, fetched_keys={"test"}) seq = self.cache.sequence test_value_2 = {"test2": "test_simple_cache_hit_miss_partial2"} - self.cache.update(seq, key, test_value_2, fetched_keys=set("test2")) + self.cache.update(seq, key, test_value_2, fetched_keys={"test2"}) - c = self.cache.get(key) + c = self.cache.get(key, dict_keys=["test", "test2"]) self.assertEqual( { "test": "test_simple_cache_hit_miss_partial", @@ -90,3 +90,30 @@ class DictCacheTestCase(unittest.TestCase): }, c.value, ) + self.assertEqual(c.full, False) + + def test_invalidation(self): + """Test that the partial dict and full dicts get invalidated + separately. + """ + key = "some_key" + + seq = self.cache.sequence + # start by populating a "full dict" entry + self.cache.update(seq, key, {"a": "b", "c": "d"}) + + # add a bunch of individual entries, also keeping the individual + # entry for "a" warm. + for i in range(20): + self.cache.get(key, ["a"]) + self.cache.update(seq, f"key{i}", {1: 2}) + + # We should have evicted the full dict... + r = self.cache.get(key) + self.assertFalse(r.full) + self.assertTrue("c" not in r.value) + + # ... but kept the "a" entry that we kept querying. + r = self.cache.get(key, dict_keys=["a"]) + self.assertFalse(r.full) + self.assertEqual(r.value, {"a": "b"}) diff --git a/tests/utils.py b/tests/utils.py index 3059c453d5..d2c6d1e852 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -15,12 +15,17 @@ import atexit import os +from typing import Any, Callable, Dict, List, Tuple, Union, overload + +import attr +from typing_extensions import Literal, ParamSpec from synapse.api.constants import EventTypes from synapse.api.room_versions import RoomVersions from synapse.config.homeserver import HomeServerConfig from synapse.config.server import DEFAULT_ROOM_VERSION from synapse.logging.context import current_context, set_current_context +from synapse.server import HomeServer from synapse.storage.database import LoggingDatabaseConnection from synapse.storage.engines import create_engine from synapse.storage.prepare_database import prepare_database @@ -50,12 +55,11 @@ SQLITE_PERSIST_DB = os.environ.get("SYNAPSE_TEST_PERSIST_SQLITE_DB") is not None POSTGRES_DBNAME_FOR_INITIAL_CREATE = "postgres" -def setupdb(): +def setupdb() -> None: # If we're using PostgreSQL, set up the db once if USE_POSTGRES_FOR_TESTS: # create a PostgresEngine db_engine = create_engine({"name": "psycopg2", "args": {}}) - # connect to postgres to create the base database. db_conn = db_engine.module.connect( user=POSTGRES_USER, @@ -64,7 +68,7 @@ def setupdb(): password=POSTGRES_PASSWORD, dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE, ) - db_conn.autocommit = True + db_engine.attempt_to_set_autocommit(db_conn, autocommit=True) cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) cur.execute( @@ -82,11 +86,11 @@ def setupdb(): port=POSTGRES_PORT, password=POSTGRES_PASSWORD, ) - db_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests") - prepare_database(db_conn, db_engine, None) - db_conn.close() + logging_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests") + prepare_database(logging_conn, db_engine, None) + logging_conn.close() - def _cleanup(): + def _cleanup() -> None: db_conn = db_engine.module.connect( user=POSTGRES_USER, host=POSTGRES_HOST, @@ -94,7 +98,7 @@ def setupdb(): password=POSTGRES_PASSWORD, dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE, ) - db_conn.autocommit = True + db_engine.attempt_to_set_autocommit(db_conn, autocommit=True) cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) cur.close() @@ -103,7 +107,19 @@ def setupdb(): atexit.register(_cleanup) -def default_config(name, parse=False): +@overload +def default_config(name: str, parse: Literal[False] = ...) -> Dict[str, object]: + ... + + +@overload +def default_config(name: str, parse: Literal[True]) -> HomeServerConfig: + ... + + +def default_config( + name: str, parse: bool = False +) -> Union[Dict[str, object], HomeServerConfig]: """ Create a reasonable test config. """ @@ -151,6 +167,7 @@ def default_config(name, parse=False): "local": {"per_second": 10000, "burst_count": 10000}, "remote": {"per_second": 10000, "burst_count": 10000}, }, + "rc_joins_per_room": {"per_second": 10000, "burst_count": 10000}, "rc_invites": { "per_room": {"per_second": 10000, "burst_count": 10000}, "per_user": {"per_second": 10000, "burst_count": 10000}, @@ -169,7 +186,7 @@ def default_config(name, parse=False): # disable user directory updates, because they get done in the # background, which upsets the test runner. "update_user_directory": False, - "caches": {"global_factor": 1}, + "caches": {"global_factor": 1, "sync_response_cache_duration": 0}, "listeners": [{"port": 0, "type": "http"}], } @@ -181,90 +198,122 @@ def default_config(name, parse=False): return config_dict -def mock_getRawHeaders(headers=None): +def mock_getRawHeaders(headers=None): # type: ignore[no-untyped-def] headers = headers if headers is not None else {} - def getRawHeaders(name, default=None): + def getRawHeaders(name, default=None): # type: ignore[no-untyped-def] + # If the requested header is present, the real twisted function returns + # List[str] if name is a str and List[bytes] if name is a bytes. + # This mock doesn't support that behaviour. + # Fortunately, none of the current callers of mock_getRawHeaders() provide a + # headers dict, so we don't encounter this discrepancy in practice. return headers.get(name, default) return getRawHeaders +P = ParamSpec("P") + + +@attr.s(slots=True, auto_attribs=True) +class Timer: + absolute_time: float + callback: Callable[[], None] + expired: bool + + +# TODO: Make this generic over a ParamSpec? +@attr.s(slots=True, auto_attribs=True) +class Looper: + func: Callable[..., Any] + interval: float # seconds + last: float + args: Tuple[object, ...] + kwargs: Dict[str, object] + + class MockClock: - now = 1000 + now = 1000.0 - def __init__(self): - # list of lists of [absolute_time, callback, expired] in no particular - # order - self.timers = [] - self.loopers = [] + def __init__(self) -> None: + # Timers in no particular order + self.timers: List[Timer] = [] + self.loopers: List[Looper] = [] - def time(self): + def time(self) -> float: return self.now - def time_msec(self): - return self.time() * 1000 + def time_msec(self) -> int: + return int(self.time() * 1000) - def call_later(self, delay, callback, *args, **kwargs): + def call_later( + self, + delay: float, + callback: Callable[P, object], + *args: P.args, + **kwargs: P.kwargs, + ) -> Timer: ctx = current_context() - def wrapped_callback(): + def wrapped_callback() -> None: set_current_context(ctx) callback(*args, **kwargs) - t = [self.now + delay, wrapped_callback, False] + t = Timer(self.now + delay, wrapped_callback, False) self.timers.append(t) return t - def looping_call(self, function, interval, *args, **kwargs): - self.loopers.append([function, interval / 1000.0, self.now, args, kwargs]) - - def cancel_call_later(self, timer, ignore_errs=False): - if timer[2]: + def looping_call( + self, + function: Callable[P, object], + interval: float, + *args: P.args, + **kwargs: P.kwargs, + ) -> None: + # This type-ignore should be redundant once we use a mypy release with + # https://github.com/python/mypy/pull/12668. + self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs)) # type: ignore[arg-type] + + def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None: + if timer.expired: if not ignore_errs: raise Exception("Cannot cancel an expired timer") - timer[2] = True + timer.expired = True self.timers = [t for t in self.timers if t != timer] # For unit testing - def advance_time(self, secs): + def advance_time(self, secs: float) -> None: self.now += secs timers = self.timers self.timers = [] for t in timers: - time, callback, expired = t - - if expired: + if t.expired: raise Exception("Timer already expired") - if self.now >= time: - t[2] = True - callback() + if self.now >= t.absolute_time: + t.expired = True + t.callback() else: self.timers.append(t) for looped in self.loopers: - func, interval, last, args, kwargs = looped - if last + interval < self.now: - func(*args, **kwargs) - looped[2] = self.now + if looped.last + looped.interval < self.now: + looped.func(*looped.args, **looped.kwargs) + looped.last = self.now - def advance_time_msec(self, ms): + def advance_time_msec(self, ms: float) -> None: self.advance_time(ms / 1000.0) - def time_bound_deferred(self, d, *args, **kwargs): - # We don't bother timing things out for now. - return d - -async def create_room(hs, room_id: str, creator_id: str): +async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None: """Creates and persist a creation event for the given room""" persistence_store = hs.get_storage_controllers().persistence + assert persistence_store is not None store = hs.get_datastores().main event_builder_factory = hs.get_event_builder_factory() event_creation_handler = hs.get_event_creation_handler() |