diff options
55 files changed, 801 insertions, 211 deletions
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2e4ee723d3..a775f70c4e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -328,51 +328,8 @@ jobs: - arrangement: monolith database: Postgres - steps: - # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. - # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path - - name: "Set Go Version" - run: | - # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 - echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH - # Add the Go path to the PATH: We need this so we can call gotestfmt - echo "~/go/bin" >> $GITHUB_PATH - - - name: "Install Complement Dependencies" - run: | - sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev - go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest - - - name: Run actions/checkout@v2 for synapse - uses: actions/checkout@v2 - with: - path: synapse - - - name: "Install custom gotestfmt template" - run: | - mkdir .gotestfmt/github -p - cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl - - # Attempt to check out the same branch of Complement as the PR. If it - # doesn't exist, fallback to HEAD. - - name: Checkout complement - run: synapse/.ci/scripts/checkout_complement.sh - - - run: | - set -o pipefail - POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt - shell: bash - name: Run Complement Tests - - # We only run the workers tests on `develop` for now, because they're too slow to wait for on PRs. - # Sadly, you can't have an `if` condition on the value of a matrix, so this is a temporary, separate job for now. - # GitHub Actions doesn't support YAML anchors, so it's full-on duplication for now. - complement-developonly: - if: "${{ !failure() && !cancelled() && (github.ref == 'refs/heads/develop') }}" - needs: linting-done - runs-on: ubuntu-latest - - name: "Complement Workers (develop only)" + - arrangement: workers + database: Postgres steps: # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. @@ -406,7 +363,7 @@ jobs: - run: | set -o pipefail - WORKERS=1 COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt + POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt shell: bash name: Run Complement Tests diff --git a/changelog.d/13029.doc b/changelog.d/13029.doc new file mode 100644 index 0000000000..d398f0fdbe --- /dev/null +++ b/changelog.d/13029.doc @@ -0,0 +1 @@ +Add an explanation of the `--report-stats` argument to the docs. diff --git a/changelog.d/13031.feature b/changelog.d/13031.feature new file mode 100644 index 0000000000..fee8e9d1ff --- /dev/null +++ b/changelog.d/13031.feature @@ -0,0 +1 @@ +Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. diff --git a/changelog.d/13077.doc b/changelog.d/13077.doc new file mode 100644 index 0000000000..502f2d059e --- /dev/null +++ b/changelog.d/13077.doc @@ -0,0 +1,3 @@ +Clean up references to sample configuration and redirect users to the configuration manual instead. + + diff --git a/changelog.d/13103.misc b/changelog.d/13103.misc new file mode 100644 index 0000000000..4de5f9e905 --- /dev/null +++ b/changelog.d/13103.misc @@ -0,0 +1 @@ +Add missing type hints to `synapse.logging`. diff --git a/changelog.d/13116.doc b/changelog.d/13116.doc new file mode 100644 index 0000000000..f99be50f44 --- /dev/null +++ b/changelog.d/13116.doc @@ -0,0 +1 @@ +Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. diff --git a/changelog.d/13119.misc b/changelog.d/13119.misc new file mode 100644 index 0000000000..3bb51962e7 --- /dev/null +++ b/changelog.d/13119.misc @@ -0,0 +1 @@ +Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. diff --git a/changelog.d/13125.feature b/changelog.d/13125.feature new file mode 100644 index 0000000000..9b0f609541 --- /dev/null +++ b/changelog.d/13125.feature @@ -0,0 +1 @@ +Add a rate limit for local users sending invites. \ No newline at end of file diff --git a/changelog.d/13127.misc b/changelog.d/13127.misc new file mode 100644 index 0000000000..1414811e0a --- /dev/null +++ b/changelog.d/13127.misc @@ -0,0 +1 @@ +Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. \ No newline at end of file diff --git a/changelog.d/13129.misc b/changelog.d/13129.misc new file mode 100644 index 0000000000..4c2dbb7057 --- /dev/null +++ b/changelog.d/13129.misc @@ -0,0 +1 @@ +Only one-line SQL statements for logging and tracing. diff --git a/changelog.d/13134.misc b/changelog.d/13134.misc new file mode 100644 index 0000000000..e3e16056d1 --- /dev/null +++ b/changelog.d/13134.misc @@ -0,0 +1 @@ +Apply ratelimiting earlier in processing of /send request. \ No newline at end of file diff --git a/changelog.d/13135.misc b/changelog.d/13135.misc new file mode 100644 index 0000000000..f096dd8749 --- /dev/null +++ b/changelog.d/13135.misc @@ -0,0 +1 @@ +Enforce type annotations for `tests.test_server`. diff --git a/changelog.d/13139.doc b/changelog.d/13139.doc new file mode 100644 index 0000000000..f5d99d461a --- /dev/null +++ b/changelog.d/13139.doc @@ -0,0 +1 @@ +Add a link to the configuration manual from the homeserver sample config documentation. diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh index 773c7db22f..cc6482f763 100755 --- a/docker/complement/conf/start_for_complement.sh +++ b/docker/complement/conf/start_for_complement.sh @@ -59,6 +59,9 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then synchrotron, \ appservice, \ pusher" + + # Improve startup times by using a launcher based on fork() + export SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER=1 else # Empty string here means 'main process only' export SYNAPSE_WORKER_TYPES="" diff --git a/docker/conf-workers/synapse.supervisord.conf.j2 b/docker/conf-workers/synapse.supervisord.conf.j2 index 6443450491..481eb4fc92 100644 --- a/docker/conf-workers/synapse.supervisord.conf.j2 +++ b/docker/conf-workers/synapse.supervisord.conf.j2 @@ -1,3 +1,24 @@ +{% if use_forking_launcher %} +[program:synapse_fork] +command=/usr/local/bin/python -m synapse.app.complement_fork_starter + {{ main_config_path }} + synapse.app.homeserver + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml + {%- for worker in workers %} + -- {{ worker.app }} + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml + --config-path=/conf/workers/{{ worker.name }}.yaml + {%- endfor %} +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +autorestart=unexpected +exitcodes=0 + +{% else %} [program:synapse_main] command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver --config-path="{{ main_config_path }}" @@ -13,7 +34,7 @@ autorestart=unexpected exitcodes=0 -{% for worker in workers %} + {% for worker in workers %} [program:synapse_{{ worker.name }}] command=/usr/local/bin/prefix-log /usr/local/bin/python -m {{ worker.app }} --config-path="{{ main_config_path }}" @@ -27,4 +48,5 @@ stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 -{% endfor %} + {% endfor %} +{% endif %} diff --git a/docker/conf/log.config b/docker/conf/log.config index dc8c70befd..d9e85aa533 100644 --- a/docker/conf/log.config +++ b/docker/conf/log.config @@ -2,7 +2,11 @@ version: 1 formatters: precise: + {% if include_worker_name_in_log_line %} + format: '{{ worker_name }} | %(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' + {% else %} format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' + {% endif %} handlers: {% if LOG_FILE_PATH %} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 2134b648d5..4521f99eb4 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -26,6 +26,9 @@ # * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. # * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified, # Nginx will be configured to serve TLS on port 8448. +# * SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER: Whether to use the forking launcher, +# only intended for usage in Complement at the moment. +# No stability guarantees are provided. # # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined # in the project's README), this script may be run multiple times, and functionality should @@ -525,6 +528,7 @@ def generate_worker_files( "/etc/supervisor/conf.d/synapse.conf", workers=worker_descriptors, main_config_path=config_path, + use_forking_launcher=environ.get("SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER"), ) # healthcheck config @@ -560,6 +564,9 @@ def generate_worker_log_config( log_config_filepath, worker_name=worker_name, **extra_log_template_args, + include_worker_name_in_log_line=environ.get( + "SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER" + ), ) return log_config_filepath diff --git a/docker/start.py b/docker/start.py index 4ac8f03477..5a98dce551 100755 --- a/docker/start.py +++ b/docker/start.py @@ -110,7 +110,11 @@ def generate_config_from_template( log_config_file = environ["SYNAPSE_LOG_CONFIG"] log("Generating log config file " + log_config_file) - convert("/conf/log.config", log_config_file, environ) + convert( + "/conf/log.config", + log_config_file, + {**environ, "include_worker_name_in_log_line": False}, + ) # Hopefully we already have a signing key, but generate one if not. args = [ diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index 62f89e8cba..1235f1cb95 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -124,9 +124,8 @@ Body parameters: - `address` - string. Value of third-party ID. belonging to a user. - `external_ids` - array, optional. Allow setting the identifier of the external identity - provider for SSO (Single sign-on). Details in - [Sample Configuration File](../usage/configuration/homeserver_sample_config.html) - section `sso` and `oidc_providers`. + provider for SSO (Single sign-on). Details in the configuration manual under the + sections [sso](../usage/configuration/config_documentation.md#sso) and [oidc_providers](../usage/configuration/config_documentation.md#oidc_providers). - `auth_provider` - string. ID of the external identity provider. Value of `idp_id` in the homeserver configuration. Note that no error is raised if the provided value is not in the homeserver configuration. diff --git a/docs/code_style.md b/docs/code_style.md index db7edcd76b..d65fda62d1 100644 --- a/docs/code_style.md +++ b/docs/code_style.md @@ -70,82 +70,61 @@ on save as they take a while and can be very resource intensive. - Avoid wildcard imports (`from synapse.types import *`) and relative imports (`from .types import UserID`). -## Configuration file format +## Configuration code and documentation format -The [sample configuration file](./sample_config.yaml) acts as a +When adding a configuration option to the code, if several settings are grouped into a single dict, ensure that your code +correctly handles the top-level option being set to `None` (as it will be if no sub-options are enabled). + +The [configuration manual](usage/configuration/config_documentation.md) acts as a reference to Synapse's configuration options for server administrators. Remember that many readers will be unfamiliar with YAML and server -administration in general, so that it is important that the file be as -easy to understand as possible, which includes following a consistent -format. +administration in general, so it is important that when you add +a configuration option the documentation be as easy to understand as possible, which +includes following a consistent format. Some guidelines follow: -- Sections should be separated with a heading consisting of a single - line prefixed and suffixed with `##`. There should be **two** blank - lines before the section header, and **one** after. -- Each option should be listed in the file with the following format: - - A comment describing the setting. Each line of this comment - should be prefixed with a hash (`#`) and a space. +- Each option should be listed in the config manual with the following format: + + - The name of the option, prefixed by `###`. - The comment should describe the default behaviour (ie, what + - A comment which describes the default behaviour (i.e. what happens if the setting is omitted), as well as what the effect will be if the setting is changed. - - Often, the comment end with something like "uncomment the - following to <do action>". - - - A line consisting of only `#`. - - A commented-out example setting, prefixed with only `#`. + - An example setting, using backticks to define the code block For boolean (on/off) options, convention is that this example - should be the *opposite* to the default (so the comment will end - with "Uncomment the following to enable [or disable] - <feature>." For other options, the example should give some - non-default value which is likely to be useful to the reader. - -- There should be a blank line between each option. -- Where several settings are grouped into a single dict, *avoid* the - convention where the whole block is commented out, resulting in - comment lines starting `# #`, as this is hard to read and confusing - to edit. Instead, leave the top-level config option uncommented, and - follow the conventions above for sub-options. Ensure that your code - correctly handles the top-level option being set to `None` (as it - will be if no sub-options are enabled). -- Lines should be wrapped at 80 characters. -- Use two-space indents. -- `true` and `false` are spelt thus (as opposed to `True`, etc.) -- Use single quotes (`'`) rather than double-quotes (`"`) or backticks - (`` ` ``) to refer to configuration options. + should be the *opposite* to the default. For other options, the example should give + some non-default value which is likely to be useful to the reader. + +- There should be a horizontal rule between each option, which can be achieved by adding `---` before and + after the option. +- `true` and `false` are spelt thus (as opposed to `True`, etc.) Example: +--- +### `modules` + +Use the `module` sub-option to add a module under `modules` to extend functionality. +The `module` setting then has a sub-option, `config`, which can be used to define some configuration +for the `module`. + +Defaults to none. + +Example configuration: ```yaml -## Frobnication ## - -# The frobnicator will ensure that all requests are fully frobnicated. -# To enable it, uncomment the following. -# -#frobnicator_enabled: true - -# By default, the frobnicator will frobnicate with the default frobber. -# The following will make it use an alternative frobber. -# -#frobincator_frobber: special_frobber - -# Settings for the frobber -# -frobber: - # frobbing speed. Defaults to 1. - # - #speed: 10 - - # frobbing distance. Defaults to 1000. - # - #distance: 100 +modules: + - module: my_super_module.MySuperClass + config: + do_thing: true + - module: my_other_super_module.SomeClass + config: {} ``` +--- Note that the sample configuration is generated from the synapse code and is maintained by a script, `scripts-dev/generate_sample_config.sh`. Making sure that the output from this script matches the desired format is left as an exercise for the reader! + diff --git a/docs/jwt.md b/docs/jwt.md index 8f859d59a6..2e262583a7 100644 --- a/docs/jwt.md +++ b/docs/jwt.md @@ -49,9 +49,8 @@ as follows: * For other installation mechanisms, see the documentation provided by the maintainer. -To enable the JSON web token integration, you should then add a `jwt_config` section -to your configuration file (or uncomment the `enabled: true` line in the -existing section). See [sample_config.yaml](./sample_config.yaml) for some +To enable the JSON web token integration, you should then add a `jwt_config` option +to your configuration file. See the [configuration manual](usage/configuration/config_documentation.md#jwt_config) for some sample settings. ## How to test JWT as a developer diff --git a/docs/manhole.md b/docs/manhole.md index a82fad0f0f..4e5bf833ce 100644 --- a/docs/manhole.md +++ b/docs/manhole.md @@ -13,8 +13,10 @@ environments where untrusted users have shell access. ## Configuring the manhole -To enable it, first uncomment the `manhole` listener configuration in -`homeserver.yaml`. The configuration is slightly different if you're using docker. +To enable it, first add the `manhole` listener configuration in your +`homeserver.yaml`. You can find information on how to do that +in the [configuration manual](usage/configuration/config_documentation.md#manhole_settings). +The configuration is slightly different if you're using docker. #### Docker config diff --git a/docs/message_retention_policies.md b/docs/message_retention_policies.md index b52c4aaa24..8c88f93935 100644 --- a/docs/message_retention_policies.md +++ b/docs/message_retention_policies.md @@ -49,9 +49,9 @@ clients. ## Server configuration -Support for this feature can be enabled and configured in the -`retention` section of the Synapse configuration file (see the -[sample file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518)). +Support for this feature can be enabled and configured by adding a the +`retention` in the Synapse configuration file (see +[configuration manual](usage/configuration/config_documentation.md#retention)). To enable support for message retention policies, set the setting `enabled` in this section to `true`. @@ -65,8 +65,8 @@ message retention policy configured in its state. This allows server admins to ensure that messages are never kept indefinitely in a server's database. -A default policy can be defined as such, in the `retention` section of -the configuration file: +A default policy can be defined as such, by adding the `retention` option in +the configuration file and adding these sub-options: ```yaml default_policy: @@ -86,8 +86,8 @@ Purge jobs are the jobs that Synapse runs in the background to purge expired events from the database. They are only run if support for message retention policies is enabled in the server's configuration. If no configuration for purge jobs is configured by the server admin, -Synapse will use a default configuration, which is described in the -[sample configuration file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518). +Synapse will use a default configuration, which is described here in the +[configuration manual](usage/configuration/config_documentation.md#retention). Some server admins might want a finer control on when events are removed depending on an event's room's policy. This can be done by setting the @@ -137,8 +137,8 @@ the server's database. ### Lifetime limits Server admins can set limits on the values of `max_lifetime` to use when -purging old events in a room. These limits can be defined as such in the -`retention` section of the configuration file: +purging old events in a room. These limits can be defined under the +`retention` option in the configuration file: ```yaml allowed_lifetime_min: 1d diff --git a/docs/openid.md b/docs/openid.md index 9d615a5737..d0ccf36f71 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -45,8 +45,8 @@ as follows: maintainer. To enable the OpenID integration, you should then add a section to the `oidc_providers` -setting in your configuration file (or uncomment one of the existing examples). -See [sample_config.yaml](./sample_config.yaml) for some sample settings, as well as +setting in your configuration file. +See the [configuration manual](usage/configuration/config_documentation.md#oidc_providers) for some sample settings, as well as the text below for example configurations for specific providers. ## Sample configs diff --git a/docs/setup/forward_proxy.md b/docs/setup/forward_proxy.md index 494c14893b..3482691f83 100644 --- a/docs/setup/forward_proxy.md +++ b/docs/setup/forward_proxy.md @@ -66,8 +66,8 @@ in Synapse can be deactivated. **NOTE**: This has an impact on security and is for testing purposes only! -To deactivate the certificate validation, the following setting must be made in -[homserver.yaml](../usage/configuration/homeserver_sample_config.md). +To deactivate the certificate validation, the following setting must be added to +your [homserver.yaml](../usage/configuration/homeserver_sample_config.md). ```yaml use_insecure_ssl_client_just_for_testing_do_not_use: true diff --git a/docs/setup/installation.md b/docs/setup/installation.md index 5bdefe2bc1..260e50577b 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -232,7 +232,9 @@ python -m synapse.app.homeserver \ --report-stats=[yes|no] ``` -... substituting an appropriate value for `--server-name`. +... substituting an appropriate value for `--server-name` and choosing whether +or not to report usage statistics (hostname, Synapse version, uptime, total +users, etc.) to the developers via the `--report-stats` argument. This command will generate you a config file that you can then customise, but it will also generate a set of keys for you. These keys will allow your homeserver to @@ -405,11 +407,11 @@ The recommended way to do so is to set up a reverse proxy on port Alternatively, you can configure Synapse to expose an HTTPS port. To do so, you will need to edit `homeserver.yaml`, as follows: -- First, under the `listeners` section, uncomment the configuration for the - TLS-enabled listener. (Remove the hash sign (`#`) at the start of - each line). The relevant lines are like this: +- First, under the `listeners` option, add the configuration for the + TLS-enabled listener like so: ```yaml +listeners: - port: 8448 type: http tls: true @@ -417,9 +419,11 @@ so, you will need to edit `homeserver.yaml`, as follows: - names: [client, federation] ``` -- You will also need to uncomment the `tls_certificate_path` and - `tls_private_key_path` lines under the `TLS` section. You will need to manage - provisioning of these certificates yourself. +- You will also need to add the options `tls_certificate_path` and + `tls_private_key_path`. to your configuration file. You will need to manage provisioning of + these certificates yourself. +- You can find more information about these options as well as how to configure synapse in the + [configuration manual](../usage/configuration/config_documentation.md). If you are using your own certificate, be sure to use a `.pem` file that includes the full certificate chain including any intermediate certificates diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 58a74ace48..82edd53e36 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -317,7 +317,7 @@ Example configuration: allow_public_rooms_without_auth: true ``` --- -### `allow_public_rooms_without_auth` +### `allow_public_rooms_over_federation` If set to true, allows any other homeserver to fetch the server's public rooms directory via federation. Defaults to false. @@ -2999,7 +2999,7 @@ This setting has the following sub-options: * `localdb_enabled`: Set to false to disable authentication against the local password database. This is ignored if `enabled` is false, and is only useful if you have other `password_providers`. Defaults to true. -* `pepper`: Set the value here to a secret random string for extra security. # Uncomment and change to a secret random string for extra security. +* `pepper`: Set the value here to a secret random string for extra security. DO NOT CHANGE THIS AFTER INITIAL SETUP! * `policy`: Define and enforce a password policy, such as minimum lengths for passwords, etc. Each parameter is optional. This is an implementation of MSC2000. Parameters are as follows: diff --git a/docs/usage/configuration/homeserver_sample_config.md b/docs/usage/configuration/homeserver_sample_config.md index 11e806998d..2dbfb35baa 100644 --- a/docs/usage/configuration/homeserver_sample_config.md +++ b/docs/usage/configuration/homeserver_sample_config.md @@ -9,6 +9,9 @@ a real homeserver.yaml. Instead, if you are starting from scratch, please genera a fresh config using Synapse by following the instructions in [Installation](../../setup/installation.md). +Documentation for all configuration options can be found in the +[Configuration Manual](./config_documentation.md). + ```yaml {{#include ../../sample_config.yaml}} ``` diff --git a/docs/usage/configuration/user_authentication/single_sign_on/cas.md b/docs/usage/configuration/user_authentication/single_sign_on/cas.md index 3bac1b29f0..899face876 100644 --- a/docs/usage/configuration/user_authentication/single_sign_on/cas.md +++ b/docs/usage/configuration/user_authentication/single_sign_on/cas.md @@ -4,5 +4,5 @@ Synapse supports authenticating users via the [Central Authentication Service protocol](https://en.wikipedia.org/wiki/Central_Authentication_Service) (CAS) natively. -Please see the `cas_config` and `sso` sections of the [Synapse configuration -file](../../../configuration/homeserver_sample_config.md) for more details. \ No newline at end of file +Please see the [cas_config](../../../configuration/config_documentation.md#cas_config) and [sso](../../../configuration/config_documentation.md#sso) +sections of the configuration manual for more details. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 4b08f45c6d..b9b16860db 100644 --- a/mypy.ini +++ b/mypy.ini @@ -56,7 +56,6 @@ exclude = (?x) |tests/server.py |tests/server_notices/test_resource_limits_server_notices.py |tests/test_metrics.py - |tests/test_server.py |tests/test_state.py |tests/test_terms_auth.py |tests/util/caches/test_cached_call.py @@ -89,9 +88,6 @@ disallow_untyped_defs = False [mypy-synapse.logging.opentracing] disallow_untyped_defs = False -[mypy-synapse.logging.scopecontextmanager] -disallow_untyped_defs = False - [mypy-synapse.metrics._reactor_metrics] disallow_untyped_defs = False # This module imports select.epoll. That exists on Linux, but doesn't on macOS. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 7b96f61d7b..d3b4887f69 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -268,6 +268,9 @@ class MockHomeserver: def get_instance_name(self) -> str: return "master" + def should_send_federation(self) -> bool: + return False + class Porter: def __init__( diff --git a/synapse/api/constants.py b/synapse/api/constants.py index e1d31cabed..2653764119 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -259,3 +259,13 @@ class ReceiptTypes: READ: Final = "m.read" READ_PRIVATE: Final = "org.matrix.msc2285.read.private" FULLY_READ: Final = "m.fully_read" + + +class PublicRoomsFilterFields: + """Fields in the search filter for `/publicRooms` that we understand. + + As defined in https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3publicrooms + """ + + GENERIC_SEARCH_TERM: Final = "generic_search_term" + ROOM_TYPES: Final = "org.matrix.msc3827.room_types" diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 363ac98ea9..923891ae0d 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -106,7 +106,9 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs) def start_worker_reactor( appname: str, config: HomeServerConfig, - run_command: Callable[[], None] = reactor.run, + # Use a lambda to avoid binding to a given reactor at import time. + # (needed when synapse.app.complement_fork_starter is being used) + run_command: Callable[[], None] = lambda: reactor.run(), ) -> None: """Run the reactor in the main process @@ -141,7 +143,9 @@ def start_reactor( daemonize: bool, print_pidfile: bool, logger: logging.Logger, - run_command: Callable[[], None] = reactor.run, + # Use a lambda to avoid binding to a given reactor at import time. + # (needed when synapse.app.complement_fork_starter is being used) + run_command: Callable[[], None] = lambda: reactor.run(), ) -> None: """Run the reactor in the main process diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py new file mode 100644 index 0000000000..89eb07df27 --- /dev/null +++ b/synapse/app/complement_fork_starter.py @@ -0,0 +1,190 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ## What this script does +# +# This script spawns multiple workers, whilst only going through the code loading +# process once. The net effect is that start-up time for a swarm of workers is +# reduced, particularly in CPU-constrained environments. +# +# Before the workers are spawned, the database is prepared in order to avoid the +# workers racing. +# +# ## Stability +# +# This script is only intended for use within the Synapse images for the +# Complement test suite. +# There are currently no stability guarantees whatsoever; especially not about: +# - whether it will continue to exist in future versions; +# - the format of its command-line arguments; or +# - any details about its behaviour or principles of operation. +# +# ## Usage +# +# The first argument should be the path to the database configuration, used to +# set up the database. The rest of the arguments are used as follows: +# Each worker is specified as an argument group (each argument group is +# separated by '--'). +# The first argument in each argument group is the Python module name of the application +# to start. Further arguments are then passed to that module as-is. +# +# ## Example +# +# python -m synapse.app.complement_fork_starter path_to_db_config.yaml \ +# synapse.app.homeserver [args..] -- \ +# synapse.app.generic_worker [args..] -- \ +# ... +# synapse.app.generic_worker [args..] +# +import argparse +import importlib +import itertools +import multiprocessing +import sys +from typing import Any, Callable, List + +from twisted.internet.main import installReactor + + +class ProxiedReactor: + """ + Twisted tracks the 'installed' reactor as a global variable. + (Actually, it does some module trickery, but the effect is similar.) + + The default EpollReactor is buggy if it's created before a process is + forked, then used in the child. + See https://twistedmatrix.com/trac/ticket/4759#comment:17. + + However, importing certain Twisted modules will automatically create and + install a reactor if one hasn't already been installed. + It's not normally possible to re-install a reactor. + + Given the goal of launching workers with fork() to only import the code once, + this presents a conflict. + Our work around is to 'install' this ProxiedReactor which prevents Twisted + from creating and installing one, but which lets us replace the actual reactor + in use later on. + """ + + def __init__(self) -> None: + self.___reactor_target: Any = None + + def _install_real_reactor(self, new_reactor: Any) -> None: + """ + Install a real reactor for this ProxiedReactor to forward lookups onto. + + This method is specific to our ProxiedReactor and should not clash with + any names used on an actual Twisted reactor. + """ + self.___reactor_target = new_reactor + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self.___reactor_target, attr_name) + + +def _worker_entrypoint( + func: Callable[[], None], proxy_reactor: ProxiedReactor, args: List[str] +) -> None: + """ + Entrypoint for a forked worker process. + + We just need to set up the command-line arguments, create our real reactor + and then kick off the worker's main() function. + """ + + sys.argv = args + + from twisted.internet.epollreactor import EPollReactor + + proxy_reactor._install_real_reactor(EPollReactor()) + func() + + +def main() -> None: + """ + Entrypoint for the forking launcher. + """ + parser = argparse.ArgumentParser() + parser.add_argument("db_config", help="Path to database config file") + parser.add_argument( + "args", + nargs="...", + help="Argument groups separated by `--`. " + "The first argument of each group is a Synapse app name. " + "Subsequent arguments are passed through.", + ) + ns = parser.parse_args() + + # Split up the subsequent arguments into each workers' arguments; + # `--` is our delimiter of choice. + args_by_worker: List[List[str]] = [ + list(args) + for cond, args in itertools.groupby(ns.args, lambda ele: ele != "--") + if cond and args + ] + + # Prevent Twisted from installing a shared reactor that all the workers will + # inherit when we fork(), by installing our own beforehand. + proxy_reactor = ProxiedReactor() + installReactor(proxy_reactor) + + # Import the entrypoints for all the workers. + worker_functions = [] + for worker_args in args_by_worker: + worker_module = importlib.import_module(worker_args[0]) + worker_functions.append(worker_module.main) + + # We need to prepare the database first as otherwise all the workers will + # try to create a schema version table and some will crash out. + from synapse._scripts import update_synapse_database + + update_proc = multiprocessing.Process( + target=_worker_entrypoint, + args=( + update_synapse_database.main, + proxy_reactor, + [ + "update_synapse_database", + "--database-config", + ns.db_config, + "--run-background-updates", + ], + ), + ) + print("===== PREPARING DATABASE =====", file=sys.stderr) + update_proc.start() + update_proc.join() + print("===== PREPARED DATABASE =====", file=sys.stderr) + + # At this point, we've imported all the main entrypoints for all the workers. + # Now we basically just fork() out to create the workers we need. + # Because we're using fork(), all the workers get a clone of this launcher's + # memory space and don't need to repeat the work of loading the code! + # Instead of using fork() directly, we use the multiprocessing library, + # which uses fork() on Unix platforms. + processes = [] + for (func, worker_args) in zip(worker_functions, args_by_worker): + process = multiprocessing.Process( + target=_worker_entrypoint, args=(func, proxy_reactor, worker_args) + ) + process.start() + processes.append(process) + + # Be a good parent and wait for our children to die before exiting. + for process in processes: + process.join() + + +if __name__ == "__main__": + main() diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index c82f3ee7a3..6e11fbdb9a 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -145,7 +145,7 @@ class EmailConfig(Config): raise ConfigError( 'The config option "trust_identity_server_for_password_resets" ' 'has been replaced by "account_threepid_delegate". ' - "Please consult the sample config at docs/sample_config.yaml for " + "Please consult the configuration manual at docs/usage/configuration/config_documentation.md for " "details and update your config file." ) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 0a285dba31..ee443cea00 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -87,3 +87,6 @@ class ExperimentalConfig(Config): # MSC3715: dir param on /relations. self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False) + + # MSC3827: Filtering of /publicRooms by room type + self.msc3827_enabled: bool = experimental.get("msc3827_enabled", False) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index d4090a1f9a..4fc1784efe 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -136,6 +136,11 @@ class RatelimitConfig(Config): defaults={"per_second": 0.003, "burst_count": 5}, ) + self.rc_invites_per_issuer = RateLimitConfig( + config.get("rc_invites", {}).get("per_issuer", {}), + defaults={"per_second": 0.3, "burst_count": 10}, + ) + self.rc_third_party_invite = RateLimitConfig( config.get("rc_third_party_invite", {}), defaults={ diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 189f52fe5a..c6b40a5b7a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -903,6 +903,9 @@ class EventCreationHandler: await self.clock.sleep(random.randint(1, 10)) raise ShadowBanError() + if ratelimit: + await self.request_ratelimiter.ratelimit(requester, update=False) + # We limit the number of concurrent event sends in a room so that we # don't fork the DAG too much. If we don't limit then we can end up in # a situation where event persistence can't keep up, causing diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 183d4ae3c4..29868eb743 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -25,6 +25,7 @@ from synapse.api.constants import ( GuestAccess, HistoryVisibility, JoinRules, + PublicRoomsFilterFields, ) from synapse.api.errors import ( Codes, @@ -181,6 +182,7 @@ class RoomListHandler: == HistoryVisibility.WORLD_READABLE, "guest_can_join": room["guest_access"] == "can_join", "join_rule": room["join_rules"], + "org.matrix.msc3827.room_type": room["room_type"], } # Filter out Nones – rather omit the field altogether @@ -239,7 +241,9 @@ class RoomListHandler: response["chunk"] = results response["total_room_count_estimate"] = await self.store.count_public_rooms( - network_tuple, ignore_non_federatable=from_federation + network_tuple, + ignore_non_federatable=from_federation, + search_filter=search_filter, ) return response @@ -508,8 +512,21 @@ class RoomListNextBatch: def _matches_room_entry(room_entry: JsonDict, search_filter: dict) -> bool: - if search_filter and search_filter.get("generic_search_term", None): - generic_search_term = search_filter["generic_search_term"].upper() + """Determines whether the given search filter matches a room entry returned over + federation. + + Only used if the remote server does not support MSC2197 remote-filtered search, and + hence does not support MSC3827 filtering of `/publicRooms` by room type either. + + In this case, we cannot apply the `room_type` filter since no `room_type` field is + returned. + """ + if search_filter and search_filter.get( + PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None + ): + generic_search_term = search_filter[ + PublicRoomsFilterFields.GENERIC_SEARCH_TERM + ].upper() if generic_search_term in room_entry.get("name", "").upper(): return True elif generic_search_term in room_entry.get("topic", "").upper(): diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index bf6bae1232..5648ab4bf4 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -101,19 +101,33 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): burst_count=hs.config.ratelimiting.rc_joins_remote.burst_count, ) + # Ratelimiter for invites, keyed by room (across all issuers, all + # recipients). self._invites_per_room_limiter = Ratelimiter( store=self.store, clock=self.clock, rate_hz=hs.config.ratelimiting.rc_invites_per_room.per_second, burst_count=hs.config.ratelimiting.rc_invites_per_room.burst_count, ) - self._invites_per_user_limiter = Ratelimiter( + + # Ratelimiter for invites, keyed by recipient (across all rooms, all + # issuers). + self._invites_per_recipient_limiter = Ratelimiter( store=self.store, clock=self.clock, rate_hz=hs.config.ratelimiting.rc_invites_per_user.per_second, burst_count=hs.config.ratelimiting.rc_invites_per_user.burst_count, ) + # Ratelimiter for invites, keyed by issuer (across all rooms, all + # recipients). + self._invites_per_issuer_limiter = Ratelimiter( + store=self.store, + clock=self.clock, + rate_hz=hs.config.ratelimiting.rc_invites_per_issuer.per_second, + burst_count=hs.config.ratelimiting.rc_invites_per_issuer.burst_count, + ) + self._third_party_invite_limiter = Ratelimiter( store=self.store, clock=self.clock, @@ -258,7 +272,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if room_id: await self._invites_per_room_limiter.ratelimit(requester, room_id) - await self._invites_per_user_limiter.ratelimit(requester, invitee_user_id) + await self._invites_per_recipient_limiter.ratelimit(requester, invitee_user_id) + if requester is not None: + await self._invites_per_issuer_limiter.ratelimit(requester) async def _local_membership_update( self, diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index f45e06eb0e..5c01482acf 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -271,6 +271,9 @@ class StatsHandler: room_state["is_federatable"] = ( event_content.get(EventContentFields.FEDERATE, True) is True ) + room_type = event_content.get(EventContentFields.ROOM_TYPE) + if isinstance(room_type, str): + room_state["room_type"] = room_type elif typ == EventTypes.JoinRules: room_state["join_rules"] = event_content.get("join_rule") elif typ == EventTypes.RoomHistoryVisibility: diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 903ec40c86..50c57940f9 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -164,6 +164,7 @@ Gotchas with an active span? """ import contextlib +import enum import inspect import logging import re @@ -268,7 +269,7 @@ try: _reporter: Reporter = attr.Factory(Reporter) - def set_process(self, *args, **kwargs): + def set_process(self, *args: Any, **kwargs: Any) -> None: return self._reporter.set_process(*args, **kwargs) def report_span(self, span: "opentracing.Span") -> None: @@ -319,7 +320,11 @@ _homeserver_whitelist: Optional[Pattern[str]] = None # Util methods -Sentinel = object() + +class _Sentinel(enum.Enum): + # defining a sentinel in this way allows mypy to correctly handle the + # type of a dictionary lookup. + sentinel = object() P = ParamSpec("P") @@ -339,12 +344,12 @@ def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]: return _only_if_tracing_inner -def ensure_active_span(message, ret=None): +def ensure_active_span(message: str, ret=None): """Executes the operation only if opentracing is enabled and there is an active span. If there is no active span it logs message at the error level. Args: - message (str): Message which fills in "There was no active span when trying to %s" + message: Message which fills in "There was no active span when trying to %s" in the error log if there is no active span and opentracing is enabled. ret (object): return value if opentracing is None or there is no active span. @@ -402,7 +407,7 @@ def init_tracer(hs: "HomeServer") -> None: config = JaegerConfig( config=hs.config.tracing.jaeger_config, service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}", - scope_manager=LogContextScopeManager(hs.config), + scope_manager=LogContextScopeManager(), metrics_factory=PrometheusMetricsFactory(), ) @@ -451,15 +456,15 @@ def whitelisted_homeserver(destination: str) -> bool: # Could use kwargs but I want these to be explicit def start_active_span( - operation_name, - child_of=None, - references=None, - tags=None, - start_time=None, - ignore_active_span=False, - finish_on_close=True, + operation_name: str, + child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None, + references: Optional[List["opentracing.Reference"]] = None, + tags: Optional[Dict[str, str]] = None, + start_time: Optional[float] = None, + ignore_active_span: bool = False, + finish_on_close: bool = True, *, - tracer=None, + tracer: Optional["opentracing.Tracer"] = None, ): """Starts an active opentracing span. @@ -493,11 +498,11 @@ def start_active_span( def start_active_span_follows_from( operation_name: str, contexts: Collection, - child_of=None, + child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None, start_time: Optional[float] = None, *, - inherit_force_tracing=False, - tracer=None, + inherit_force_tracing: bool = False, + tracer: Optional["opentracing.Tracer"] = None, ): """Starts an active opentracing span, with additional references to previous spans @@ -540,7 +545,7 @@ def start_active_span_from_edu( edu_content: Dict[str, Any], operation_name: str, references: Optional[List["opentracing.Reference"]] = None, - tags: Optional[Dict] = None, + tags: Optional[Dict[str, str]] = None, start_time: Optional[float] = None, ignore_active_span: bool = False, finish_on_close: bool = True, @@ -617,23 +622,27 @@ def set_operation_name(operation_name: str) -> None: @only_if_tracing -def force_tracing(span=Sentinel) -> None: +def force_tracing( + span: Union["opentracing.Span", _Sentinel] = _Sentinel.sentinel +) -> None: """Force sampling for the active/given span and its children. Args: span: span to force tracing for. By default, the active span. """ - if span is Sentinel: - span = opentracing.tracer.active_span - if span is None: + if isinstance(span, _Sentinel): + span_to_trace = opentracing.tracer.active_span + else: + span_to_trace = span + if span_to_trace is None: logger.error("No active span in force_tracing") return - span.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) + span_to_trace.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) # also set a bit of baggage, so that we have a way of figuring out if # it is enabled later - span.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1") + span_to_trace.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1") def is_context_forced_tracing( @@ -789,7 +798,7 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte # Tracing decorators -def trace(func=None, opname=None): +def trace(func=None, opname: Optional[str] = None): """ Decorator to trace a function. Sets the operation name to that of the function's or that given @@ -822,11 +831,11 @@ def trace(func=None, opname=None): result = func(*args, **kwargs) if isinstance(result, defer.Deferred): - def call_back(result): + def call_back(result: R) -> R: scope.__exit__(None, None, None) return result - def err_back(result): + def err_back(result: R) -> R: scope.__exit__(None, None, None) return result diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py index a26a1a58e7..10877bdfc5 100644 --- a/synapse/logging/scopecontextmanager.py +++ b/synapse/logging/scopecontextmanager.py @@ -16,11 +16,15 @@ import logging from types import TracebackType from typing import Optional, Type -from opentracing import Scope, ScopeManager +from opentracing import Scope, ScopeManager, Span import twisted -from synapse.logging.context import current_context, nested_logging_context +from synapse.logging.context import ( + LoggingContext, + current_context, + nested_logging_context, +) logger = logging.getLogger(__name__) @@ -35,11 +39,11 @@ class LogContextScopeManager(ScopeManager): but currently that doesn't work due to https://twistedmatrix.com/trac/ticket/10301. """ - def __init__(self, config): + def __init__(self) -> None: pass @property - def active(self): + def active(self) -> Optional[Scope]: """ Returns the currently active Scope which can be used to access the currently active Scope.span. @@ -48,19 +52,18 @@ class LogContextScopeManager(ScopeManager): Tracer.start_active_span() time. Return: - (Scope) : the Scope that is active, or None if not - available. + The Scope that is active, or None if not available. """ ctx = current_context() return ctx.scope - def activate(self, span, finish_on_close): + def activate(self, span: Span, finish_on_close: bool) -> Scope: """ Makes a Span active. Args - span (Span): the span that should become active. - finish_on_close (Boolean): whether Span should be automatically - finished when Scope.close() is called. + span: the span that should become active. + finish_on_close: whether Span should be automatically finished when + Scope.close() is called. Returns: Scope to control the end of the active period for @@ -112,8 +115,8 @@ class _LogContextScope(Scope): def __init__( self, manager: LogContextScopeManager, - span, - logcontext, + span: Span, + logcontext: LoggingContext, enter_logcontext: bool, finish_on_close: bool, ): @@ -121,13 +124,13 @@ class _LogContextScope(Scope): Args: manager: the manager that is responsible for this scope. - span (Span): + span: the opentracing span which this scope represents the local lifetime for. - logcontext (LogContext): - the logcontext to which this scope is attached. + logcontext: + the log context to which this scope is attached. enter_logcontext: - if True the logcontext will be exited when the scope is finished + if True the log context will be exited when the scope is finished finish_on_close: if True finish the span when the scope is closed """ diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index c1bd775fec..f4f06563dd 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -95,6 +95,8 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3026.busy_presence": self.config.experimental.msc3026_enabled, # Supports receiving private read receipts as per MSC2285 "org.matrix.msc2285": self.config.experimental.msc2285_enabled, + # Supports filtering of /publicRooms by room type MSC3827 + "org.matrix.msc3827": self.config.experimental.msc3827_enabled, # Adds support for importing historical messages as per MSC2716 "org.matrix.msc2716": self.config.experimental.msc2716_enabled, # Adds support for jump to date endpoints (/timestamp_to_event) as per MSC3030 diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e8c63cf567..e21ab08515 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -366,10 +366,11 @@ class LoggingTransaction: *args: P.args, **kwargs: P.kwargs, ) -> R: - sql = self._make_sql_one_line(sql) + # Generate a one-line version of the SQL to better log it. + one_line_sql = self._make_sql_one_line(sql) # TODO(paul): Maybe use 'info' and 'debug' for values? - sql_logger.debug("[SQL] {%s} %s", self.name, sql) + sql_logger.debug("[SQL] {%s} %s", self.name, one_line_sql) sql = self.database_engine.convert_param_style(sql) if args: @@ -389,7 +390,7 @@ class LoggingTransaction: "db.query", tags={ opentracing.tags.DATABASE_TYPE: "sql", - opentracing.tags.DATABASE_STATEMENT: sql, + opentracing.tags.DATABASE_STATEMENT: one_line_sql, }, ): return func(sql, *args, **kwargs) diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 57aaf778ec..a3d31d3737 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -87,7 +87,6 @@ class DataStore( RoomStore, RoomBatchStore, RegistrationStore, - StreamWorkerStore, ProfileStore, PresenceStore, TransactionWorkerStore, @@ -112,6 +111,7 @@ class DataStore( SearchStore, TagsStore, AccountDataStore, + StreamWorkerStore, OpenIdStore, ClientIpWorkerStore, DeviceStore, diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 7d4754b3d3..f432d578b5 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -25,8 +25,8 @@ from synapse.storage.database import ( LoggingDatabaseConnection, LoggingTransaction, ) -from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore +from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.util import json_encoder from synapse.util.caches.descriptors import cached @@ -122,7 +122,7 @@ def _deserialize_action(actions: str, is_highlight: bool) -> List[Union[dict, st return DEFAULT_NOTIF_ACTION -class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBaseStore): +class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBaseStore): def __init__( self, database: DatabasePool, @@ -218,7 +218,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas retcol="event_id", ) - stream_ordering = self.get_stream_id_for_event_txn(txn, event_id) # type: ignore[attr-defined] + stream_ordering = self.get_stream_id_for_event_txn(txn, event_id) return self._get_unread_counts_by_pos_txn( txn, room_id, user_id, stream_ordering @@ -307,12 +307,22 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas actions that have been deleted from `event_push_actions` table. """ + # If there have been no events in the room since the stream ordering, + # there can't be any push actions either. + if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering): + return 0, 0 + clause = "" args = [user_id, room_id, stream_ordering] if max_stream_ordering is not None: clause = "AND ea.stream_ordering <= ?" args.append(max_stream_ordering) + # If the max stream ordering is less than the min stream ordering, + # then obviously there are zero push actions in that range. + if max_stream_ordering <= stream_ordering: + return 0, 0 + sql = f""" SELECT COUNT(CASE WHEN notif = 1 THEN 1 END), diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 5760d3428e..d8026e3fac 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -32,12 +32,17 @@ from typing import ( import attr -from synapse.api.constants import EventContentFields, EventTypes, JoinRules +from synapse.api.constants import ( + EventContentFields, + EventTypes, + JoinRules, + PublicRoomsFilterFields, +) from synapse.api.errors import StoreError from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.config.homeserver import HomeServerConfig from synapse.events import EventBase -from synapse.storage._base import SQLBaseStore, db_to_json +from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, @@ -199,10 +204,29 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): desc="get_public_room_ids", ) + def _construct_room_type_where_clause( + self, room_types: Union[List[Union[str, None]], None] + ) -> Tuple[Union[str, None], List[str]]: + if not room_types or not self.config.experimental.msc3827_enabled: + return None, [] + else: + # We use None when we want get rooms without a type + is_null_clause = "" + if None in room_types: + is_null_clause = "OR room_type IS NULL" + room_types = [value for value in room_types if value is not None] + + list_clause, args = make_in_list_sql_clause( + self.database_engine, "room_type", room_types + ) + + return f"({list_clause} {is_null_clause})", args + async def count_public_rooms( self, network_tuple: Optional[ThirdPartyInstanceID], ignore_non_federatable: bool, + search_filter: Optional[dict], ) -> int: """Counts the number of public rooms as tracked in the room_stats_current and room_stats_state table. @@ -210,11 +234,20 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): Args: network_tuple ignore_non_federatable: If true filters out non-federatable rooms + search_filter """ def _count_public_rooms_txn(txn: LoggingTransaction) -> int: query_args = [] + room_type_clause, args = self._construct_room_type_where_clause( + search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None) + if search_filter + else None + ) + room_type_clause = f" AND {room_type_clause}" if room_type_clause else "" + query_args += args + if network_tuple: if network_tuple.appservice_id: published_sql = """ @@ -249,6 +282,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): OR join_rules = '{JoinRules.KNOCK_RESTRICTED}' OR history_visibility = 'world_readable' ) + {room_type_clause} AND joined_members > 0 """ @@ -347,8 +381,12 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): if ignore_non_federatable: where_clauses.append("is_federatable") - if search_filter and search_filter.get("generic_search_term", None): - search_term = "%" + search_filter["generic_search_term"] + "%" + if search_filter and search_filter.get( + PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None + ): + search_term = ( + "%" + search_filter[PublicRoomsFilterFields.GENERIC_SEARCH_TERM] + "%" + ) where_clauses.append( """ @@ -365,6 +403,15 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): search_term.lower(), ] + room_type_clause, args = self._construct_room_type_where_clause( + search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None) + if search_filter + else None + ) + if room_type_clause: + where_clauses.append(room_type_clause) + query_args += args + where_clause = "" if where_clauses: where_clause = " AND " + " AND ".join(where_clauses) @@ -373,7 +420,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): sql = f""" SELECT room_id, name, topic, canonical_alias, joined_members, - avatar, history_visibility, guest_access, join_rules + avatar, history_visibility, guest_access, join_rules, room_type FROM ( {published_sql} ) published @@ -1166,6 +1213,7 @@ class _BackgroundUpdates: POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2" REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth" POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column" + ADD_ROOM_TYPE_COLUMN = "add_room_type_column" _REPLACE_ROOM_DEPTH_SQL_COMMANDS = ( @@ -1200,6 +1248,11 @@ class RoomBackgroundUpdateStore(SQLBaseStore): self._background_add_rooms_room_version_column, ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + self._background_add_room_type_column, + ) + # BG updates to change the type of room_depth.min_depth self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2, @@ -1569,6 +1622,69 @@ class RoomBackgroundUpdateStore(SQLBaseStore): return batch_size + async def _background_add_room_type_column( + self, progress: JsonDict, batch_size: int + ) -> int: + """Background update to go and add room_type information to `room_stats_state` + table from `event_json` table. + """ + + last_room_id = progress.get("room_id", "") + + def _background_add_room_type_column_txn( + txn: LoggingTransaction, + ) -> bool: + sql = """ + SELECT state.room_id, json FROM event_json + INNER JOIN current_state_events AS state USING (event_id) + WHERE state.room_id > ? AND type = 'm.room.create' + ORDER BY state.room_id + LIMIT ? + """ + + txn.execute(sql, (last_room_id, batch_size)) + room_id_to_create_event_results = txn.fetchall() + + new_last_room_id = None + for room_id, event_json in room_id_to_create_event_results: + event_dict = db_to_json(event_json) + + room_type = event_dict.get("content", {}).get( + EventContentFields.ROOM_TYPE, None + ) + if isinstance(room_type, str): + self.db_pool.simple_update_txn( + txn, + table="room_stats_state", + keyvalues={"room_id": room_id}, + updatevalues={"room_type": room_type}, + ) + + new_last_room_id = room_id + + if new_last_room_id is None: + return True + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + {"room_id": new_last_room_id}, + ) + + return False + + end = await self.db_pool.runInteraction( + "_background_add_room_type_column", + _background_add_room_type_column_txn, + ) + + if end: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN + ) + + return batch_size + class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): def __init__( diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 82851ffa95..b4c652acf3 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -16,7 +16,7 @@ import logging from enum import Enum from itertools import chain -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast from typing_extensions import Counter @@ -238,6 +238,7 @@ class StatsStore(StateDeltasStore): * avatar * canonical_alias * guest_access + * room_type A is_federatable key can also be included with a boolean value. @@ -263,6 +264,7 @@ class StatsStore(StateDeltasStore): "avatar", "canonical_alias", "guest_access", + "room_type", ): field = fields.get(col, sentinel) if field is not sentinel and (not isinstance(field, str) or "\0" in field): @@ -572,7 +574,7 @@ class StatsStore(StateDeltasStore): state_event_map = await self.get_events(event_ids, get_prev_content=False) # type: ignore[attr-defined] - room_state = { + room_state: Dict[str, Union[None, bool, str]] = { "join_rules": None, "history_visibility": None, "encryption": None, @@ -581,6 +583,7 @@ class StatsStore(StateDeltasStore): "avatar": None, "canonical_alias": None, "is_federatable": True, + "room_type": None, } for event in state_event_map.values(): @@ -604,6 +607,9 @@ class StatsStore(StateDeltasStore): room_state["is_federatable"] = ( event.content.get(EventContentFields.FEDERATE, True) is True ) + room_type = event.content.get(EventContentFields.ROOM_TYPE) + if isinstance(room_type, str): + room_state["room_type"] = room_type await self.update_room_state(room_id, room_state) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 8e88784d3c..3a1df7776c 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -46,10 +46,12 @@ from typing import ( Set, Tuple, cast, + overload, ) import attr from frozendict import frozendict +from typing_extensions import Literal from twisted.internet import defer @@ -795,6 +797,24 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) return RoomStreamToken(topo, stream_ordering) + @overload + def get_stream_id_for_event_txn( + self, + txn: LoggingTransaction, + event_id: str, + allow_none: Literal[False] = False, + ) -> int: + ... + + @overload + def get_stream_id_for_event_txn( + self, + txn: LoggingTransaction, + event_id: str, + allow_none: bool = False, + ) -> Optional[int]: + ... + def get_stream_id_for_event_txn( self, txn: LoggingTransaction, diff --git a/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql new file mode 100644 index 0000000000..d5e0765471 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql @@ -0,0 +1,19 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE room_stats_state ADD room_type TEXT; + +INSERT INTO background_updates (update_name, progress_json) + VALUES ('add_room_type_column', '{}'); diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py index e430941d27..40148d503c 100644 --- a/tests/logging/test_opentracing.py +++ b/tests/logging/test_opentracing.py @@ -50,7 +50,7 @@ class LogContextScopeManagerTestCase(TestCase): # global variables that power opentracing. We create our own tracer instance # and test with it. - scope_manager = LogContextScopeManager({}) + scope_manager = LogContextScopeManager() config = jaeger_client.config.Config( config={}, service_name="test", scope_manager=scope_manager ) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 35c59ee9e0..1ccd96a207 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -18,7 +18,7 @@ """Tests REST events for /rooms paths.""" import json -from typing import Any, Dict, Iterable, List, Optional, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from unittest.mock import Mock, call from urllib import parse as urlparse @@ -33,7 +33,9 @@ from synapse.api.constants import ( EventContentFields, EventTypes, Membership, + PublicRoomsFilterFields, RelationTypes, + RoomTypes, ) from synapse.api.errors import Codes, HttpResponseException from synapse.handlers.pagination import PurgeStatus @@ -1858,6 +1860,90 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200, channel.result) +class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + + config = self.default_config() + config["allow_public_rooms_without_auth"] = True + config["experimental_features"] = {"msc3827_enabled": True} + self.hs = self.setup_test_homeserver(config=config) + self.url = b"/_matrix/client/r0/publicRooms" + + return self.hs + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + user = self.register_user("alice", "pass") + self.token = self.login(user, "pass") + + # Create a room + self.helper.create_room_as( + user, + is_public=True, + extra_content={"visibility": "public"}, + tok=self.token, + ) + # Create a space + self.helper.create_room_as( + user, + is_public=True, + extra_content={ + "visibility": "public", + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE}, + }, + tok=self.token, + ) + + def make_public_rooms_request( + self, room_types: Union[List[Union[str, None]], None] + ) -> Tuple[List[Dict[str, Any]], int]: + channel = self.make_request( + "POST", + self.url, + {"filter": {PublicRoomsFilterFields.ROOM_TYPES: room_types}}, + self.token, + ) + chunk = channel.json_body["chunk"] + count = channel.json_body["total_room_count_estimate"] + + self.assertEqual(len(chunk), count) + + return chunk, count + + def test_returns_both_rooms_and_spaces_if_no_filter(self) -> None: + chunk, count = self.make_public_rooms_request(None) + + self.assertEqual(count, 2) + + def test_returns_only_rooms_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request([None]) + + self.assertEqual(count, 1) + self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), None) + + def test_returns_only_space_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request(["m.space"]) + + self.assertEqual(count, 1) + self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), "m.space") + + def test_returns_both_rooms_and_space_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request(["m.space", None]) + + self.assertEqual(count, 2) + + def test_returns_both_rooms_and_spaces_if_array_is_empty(self) -> None: + chunk, count = self.make_public_rooms_request([]) + + self.assertEqual(count, 2) + + class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): """Test that we correctly fallback to local filtering if a remote server doesn't support search. @@ -1882,7 +1968,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): "Simple test for searching rooms over federation" self.federation_client.get_public_rooms.return_value = make_awaitable({}) # type: ignore[attr-defined] - search_filter = {"generic_search_term": "foobar"} + search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"} channel = self.make_request( "POST", @@ -1911,7 +1997,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): make_awaitable({}), ) - search_filter = {"generic_search_term": "foobar"} + search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"} channel = self.make_request( "POST", diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py index 9abd0cb446..1edb619630 100644 --- a/tests/storage/databases/main/test_room.py +++ b/tests/storage/databases/main/test_room.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json + +from synapse.api.constants import RoomTypes from synapse.rest import admin from synapse.rest.client import login, room from synapse.storage.databases.main.room import _BackgroundUpdates @@ -91,3 +94,69 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase): ) ) self.assertEqual(room_creator_after, self.user_id) + + def test_background_add_room_type_column(self): + """Test that the background update to populate the `room_type` column in + `room_stats_state` works properly. + """ + + # Create a room without a type + room_id = self._generate_room() + + # Get event_id of the m.room.create event + event_id = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="current_state_events", + keyvalues={ + "room_id": room_id, + "type": "m.room.create", + }, + retcol="event_id", + ) + ) + + # Fake a room creation event with a room type + event = { + "content": { + "creator": "@user:server.org", + "room_version": "9", + "type": RoomTypes.SPACE, + }, + "type": "m.room.create", + } + self.get_success( + self.store.db_pool.simple_update( + table="event_json", + keyvalues={"event_id": event_id}, + updatevalues={"json": json.dumps(event)}, + desc="test", + ) + ) + + # Insert and run the background update + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + "progress_json": "{}", + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store.db_pool.updates._all_done = False + + # Now let's actually drive the updates to completion + self.wait_for_background_updates() + + # Make sure the background update filled in the room type + room_type_after = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="room_stats_state", + keyvalues={"room_id": room_id}, + retcol="room_type", + allow_none=True, + ) + ) + self.assertEqual(room_type_after, RoomTypes.SPACE) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 684485ae06..8462952b8f 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -86,6 +86,8 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): event.internal_metadata.is_outlier.return_value = False event.depth = stream + self.store._events_stream_cache.entity_has_changed(room_id, stream) + self.get_success( self.store.db_pool.simple_insert( table="events", |