diff options
126 files changed, 1757 insertions, 878 deletions
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 51cbeb3298..f84a4ef644 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -314,8 +314,9 @@ jobs: # There aren't wheels for some of the older deps, so we need to install # their build dependencies - run: | + sudo apt update sudo apt-get -qq install build-essential libffi-dev python-dev \ - libxml2-dev libxslt-dev xmlsec1 zlib1g-dev libjpeg-dev libwebp-dev + libxml2-dev libxslt-dev xmlsec1 zlib1g-dev libjpeg-dev libwebp-dev - uses: actions/setup-python@v4 with: diff --git a/changelog.d/10428.removal b/changelog.d/10428.removal new file mode 100644 index 0000000000..c056e89585 --- /dev/null +++ b/changelog.d/10428.removal @@ -0,0 +1 @@ +Remove the old version of the R30 (30-day retained users) phone-home metric. diff --git a/changelog.d/15464.bugfix b/changelog.d/15464.bugfix new file mode 100644 index 0000000000..3c655989b3 --- /dev/null +++ b/changelog.d/15464.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where setting the read marker could fail when using message retention. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/15537.misc b/changelog.d/15537.misc new file mode 100644 index 0000000000..979e0ba977 --- /dev/null +++ b/changelog.d/15537.misc @@ -0,0 +1 @@ +Add not null constraint to column full_user_id of tables profiles and user_filters. diff --git a/changelog.d/15599.bugfix b/changelog.d/15599.bugfix new file mode 100644 index 0000000000..b58af8ad55 --- /dev/null +++ b/changelog.d/15599.bugfix @@ -0,0 +1 @@ +Print full error and stack-trace of any exception that occurs during startup/initialization. diff --git a/changelog.d/15601.bugfix b/changelog.d/15601.bugfix new file mode 100644 index 0000000000..426db6cea3 --- /dev/null +++ b/changelog.d/15601.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where the `url_preview_url_blacklist` configuration setting was not applied to oEmbed or image URLs found while previewing a URL. diff --git a/changelog.d/15602.misc b/changelog.d/15602.misc new file mode 100644 index 0000000000..cdd0c039bd --- /dev/null +++ b/changelog.d/15602.misc @@ -0,0 +1 @@ +Run mypy type checking with the minimum supported Python version to catch new usage that isn't backwards-compatible. diff --git a/changelog.d/15604.misc b/changelog.d/15604.misc new file mode 100644 index 0000000000..92d1d600bc --- /dev/null +++ b/changelog.d/15604.misc @@ -0,0 +1 @@ +Fix subscriptable type usage in Python <3.9. diff --git a/changelog.d/15606.misc b/changelog.d/15606.misc new file mode 100644 index 0000000000..568c0d3fc5 --- /dev/null +++ b/changelog.d/15606.misc @@ -0,0 +1 @@ +Update internal terminology. diff --git a/changelog.d/15607.bugfix b/changelog.d/15607.bugfix new file mode 100644 index 0000000000..a2767adbe2 --- /dev/null +++ b/changelog.d/15607.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where filters with multiple backslashes were rejected. diff --git a/changelog.d/15610.misc b/changelog.d/15610.misc new file mode 100644 index 0000000000..2eff30f6e3 --- /dev/null +++ b/changelog.d/15610.misc @@ -0,0 +1 @@ +Instrument `state` and `state_group` storage-related operations to better picture what's happening when tracing. diff --git a/changelog.d/15611.feature b/changelog.d/15611.feature new file mode 100644 index 0000000000..7cfb46fd0a --- /dev/null +++ b/changelog.d/15611.feature @@ -0,0 +1 @@ +Add a new admin API to create a new device for a user. diff --git a/changelog.d/15613.doc b/changelog.d/15613.doc new file mode 100644 index 0000000000..94733facf0 --- /dev/null +++ b/changelog.d/15613.doc @@ -0,0 +1 @@ +Warn users that at least 3.75GB of space is needed for the nix Synapse development environment. diff --git a/changelog.d/15614.bugfix b/changelog.d/15614.bugfix new file mode 100644 index 0000000000..b523ae6eb1 --- /dev/null +++ b/changelog.d/15614.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.82.0 where the error message displayed when validation of the `app_service_config_files` config option fails would be incorrectly formatted. diff --git a/changelog.d/15615.misc b/changelog.d/15615.misc new file mode 100644 index 0000000000..a39fd0a098 --- /dev/null +++ b/changelog.d/15615.misc @@ -0,0 +1 @@ +Re-type config paths in `ConfigError`s to be `StrSequence`s instead of `Iterable[str]`s. diff --git a/changelog.d/15620.misc b/changelog.d/15620.misc new file mode 100644 index 0000000000..568c0d3fc5 --- /dev/null +++ b/changelog.d/15620.misc @@ -0,0 +1 @@ +Update internal terminology. diff --git a/changelog.d/15621.misc b/changelog.d/15621.misc new file mode 100644 index 0000000000..5d060f4dbc --- /dev/null +++ b/changelog.d/15621.misc @@ -0,0 +1 @@ +Update Mutual Rooms (MSC2666) implementation to match new proposal text. \ No newline at end of file diff --git a/changelog.d/15624.bugfix b/changelog.d/15624.bugfix new file mode 100644 index 0000000000..fde515ba62 --- /dev/null +++ b/changelog.d/15624.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where deactivated users were still able to login using the custom `org.matrix.login.jwt` login type (if enabled). diff --git a/changelog.d/15625.misc b/changelog.d/15625.misc new file mode 100644 index 0000000000..7ea8cc9433 --- /dev/null +++ b/changelog.d/15625.misc @@ -0,0 +1 @@ +Remove the unstable identifiers from faster joins ([MSC3706](https://github.com/matrix-org/matrix-spec-proposals/pull/3706). diff --git a/changelog.d/15626.misc b/changelog.d/15626.misc new file mode 100644 index 0000000000..0016cdbf10 --- /dev/null +++ b/changelog.d/15626.misc @@ -0,0 +1 @@ +Fix the olddeps CI. diff --git a/changelog.d/15630.misc b/changelog.d/15630.misc new file mode 100644 index 0000000000..a30304bfd6 --- /dev/null +++ b/changelog.d/15630.misc @@ -0,0 +1 @@ +Fix two memory leaks in `trial` test runs. diff --git a/changelog.d/15633.misc b/changelog.d/15633.misc new file mode 100644 index 0000000000..4126a20602 --- /dev/null +++ b/changelog.d/15633.misc @@ -0,0 +1 @@ +Trace how many new events from the backfill response we need to process. diff --git a/changelog.d/15636.misc b/changelog.d/15636.misc new file mode 100644 index 0000000000..82329c5e43 --- /dev/null +++ b/changelog.d/15636.misc @@ -0,0 +1 @@ +Remove duplicate timestamp from test logs (`_trial_temp/test.log`). diff --git a/changelog.d/15639.misc b/changelog.d/15639.misc new file mode 100644 index 0000000000..92230e206f --- /dev/null +++ b/changelog.d/15639.misc @@ -0,0 +1 @@ +Bump types-setuptools from 67.7.0.2 to 67.8.0.0. diff --git a/changelog.d/15640.misc b/changelog.d/15640.misc new file mode 100644 index 0000000000..4c2a3dbc52 --- /dev/null +++ b/changelog.d/15640.misc @@ -0,0 +1 @@ +Bump types-pillow from 9.5.0.2 to 9.5.0.4. diff --git a/changelog.d/15641.misc b/changelog.d/15641.misc new file mode 100644 index 0000000000..a85d85c58e --- /dev/null +++ b/changelog.d/15641.misc @@ -0,0 +1 @@ +Bump sphinx from 6.1.3 to 6.2.1. diff --git a/changelog.d/15642.misc b/changelog.d/15642.misc new file mode 100644 index 0000000000..5d6125140d --- /dev/null +++ b/changelog.d/15642.misc @@ -0,0 +1 @@ +Bump furo from 2023.3.27 to 2023.5.20. diff --git a/changelog.d/15643.misc b/changelog.d/15643.misc new file mode 100644 index 0000000000..5bd2e74071 --- /dev/null +++ b/changelog.d/15643.misc @@ -0,0 +1 @@ +Bump pygithub from 1.58.1 to 1.58.2. diff --git a/changelog.d/15646.misc b/changelog.d/15646.misc new file mode 100644 index 0000000000..872afe30b8 --- /dev/null +++ b/changelog.d/15646.misc @@ -0,0 +1 @@ +Limit the size of the `HomeServerConfig` cache in trial test runs. diff --git a/changelog.d/15648.doc b/changelog.d/15648.doc new file mode 100644 index 0000000000..70f65ebbff --- /dev/null +++ b/changelog.d/15648.doc @@ -0,0 +1 @@ +Remove outdated comment from the generated and sample homeserver log configs. \ No newline at end of file diff --git a/changelog.d/15651.misc b/changelog.d/15651.misc new file mode 100644 index 0000000000..4d7c0248b2 --- /dev/null +++ b/changelog.d/15651.misc @@ -0,0 +1 @@ +Bump requests from 2.28.2 to 2.31.0. diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index 6b952ba396..229942b311 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -813,6 +813,33 @@ The following fields are returned in the JSON response body: - `total` - Total number of user's devices. +### Create a device + +Creates a new device for a specific `user_id` and `device_id`. Does nothing if the `device_id` +exists already. + +The API is: + +``` +POST /_synapse/admin/v2/users/<user_id>/devices + +{ + "device_id": "QBUAZIFURK" +} +``` + +An empty JSON dict is returned. + +**Parameters** + +The following parameters should be set in the URL: + +- `user_id` - fully qualified: for example, `@user:server.com`. + +The following fields are required in the JSON request body: + +- `device_id` - The device ID to create. + ### Delete multiple devices Deletes the given devices for a specific `user_id`, and invalidates any access token associated with them. diff --git a/docs/replication.md b/docs/replication.md index 108da9a065..25145daaf5 100644 --- a/docs/replication.md +++ b/docs/replication.md @@ -30,12 +30,6 @@ minimal. See [the TCP replication documentation](tcp_replication.md). -### The Slaved DataStore - -There are read-only version of the synapse storage layer in -`synapse/replication/slave/storage` that use the response of the -replication API to invalidate their caches. - ### The TCP Replication Module Information about how the tcp replication module is structured, including how the classes interact, can be found in diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml index 6339160d00..ae0318122e 100644 --- a/docs/sample_log_config.yaml +++ b/docs/sample_log_config.yaml @@ -68,9 +68,7 @@ root: # Write logs to the `buffer` handler, which will buffer them together in memory, # then write them to a file. # - # Replace "buffer" with "console" to log to stderr instead. (Note that you'll - # also need to update the configuration for the `twisted` logger above, in - # this case.) + # Replace "buffer" with "console" to log to stderr instead. # handlers: [buffer] diff --git a/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md index 3a7ed7c806..60b758e33b 100644 --- a/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md +++ b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md @@ -42,11 +42,6 @@ The following statistics are sent to the configured reporting endpoint: | `daily_e2ee_messages` | int | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours. | | `daily_sent_messages` | int | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours. | | `daily_sent_e2ee_messages` | int | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours. | -| `r30_users_all` | int | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types. | -| `r30_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string. | -| `r30_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string. | -| `r30_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string. | -| `r30_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string. | | `r30v2_users_all` | int | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. | | `r30v2_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string. | | `r30v2_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string. | diff --git a/flake.nix b/flake.nix index 7351571e61..8d2bf779bd 100644 --- a/flake.nix +++ b/flake.nix @@ -1,35 +1,30 @@ -# A nix flake that sets up a complete Synapse development environment. Dependencies +# A Nix flake that sets up a complete Synapse development environment. Dependencies # for the SyTest (https://github.com/matrix-org/sytest) and Complement # (https://github.com/matrix-org/complement) Matrix homeserver test suites are also # installed automatically. # -# You must have already installed nix (https://nixos.org) on your system to use this. -# nix can be installed on Linux or MacOS; NixOS is not required. Windows is not -# directly supported, but nix can be installed inside of WSL2 or even Docker +# You must have already installed Nix (https://nixos.org) on your system to use this. +# Nix can be installed on Linux or MacOS; NixOS is not required. Windows is not +# directly supported, but Nix can be installed inside of WSL2 or even Docker # containers. Please refer to https://nixos.org/download for details. # # You must also enable support for flakes in Nix. See the following for how to # do so permanently: https://nixos.wiki/wiki/Flakes#Enable_flakes # +# Be warned: you'll need over 3.75 GB of free space to download all the dependencies. +# # Usage: # -# With nix installed, navigate to the directory containing this flake and run +# With Nix installed, navigate to the directory containing this flake and run # `nix develop --impure`. The `--impure` is necessary in order to store state # locally from "services", such as PostgreSQL and Redis. # # You should now be dropped into a new shell with all programs and dependencies # availabile to you! # -# You can start up pre-configured, local PostgreSQL and Redis instances by +# You can start up pre-configured local Synapse, PostgreSQL and Redis instances by # running: `devenv up`. To stop them, use Ctrl-C. # -# A PostgreSQL database called 'synapse' will be set up for you, along with -# a PostgreSQL user named 'synapse_user'. -# The 'host' can be found by running `echo $PGHOST` with the development -# shell activated. Use these values to configure your Synapse to connect -# to the local PostgreSQL database. You do not need to specify a password. -# https://matrix-org.github.io/synapse/latest/postgres -# # All state (the venv, postgres and redis data and config) are stored in # .devenv/state. Deleting a file from here and then re-entering the shell # will recreate these files from scratch. @@ -66,7 +61,7 @@ let pkgs = nixpkgs.legacyPackages.${system}; in { - # Everything is configured via devenv - a nix module for creating declarative + # Everything is configured via devenv - a Nix module for creating declarative # developer environments. See https://devenv.sh/reference/options/ for a list # of all possible options. default = devenv.lib.mkShell { @@ -153,11 +148,39 @@ # Redis is needed in order to run Synapse in worker mode. services.redis.enable = true; + # Configure and start Synapse. Before starting Synapse, this shell code: + # * generates a default homeserver.yaml config file if one does not exist, and + # * ensures a directory containing two additional homeserver config files exists; + # one to configure using the development environment's PostgreSQL as the + # database backend and another for enabling Redis support. + process.before = '' + python -m synapse.app.homeserver -c homeserver.yaml --generate-config --server-name=synapse.dev --report-stats=no + mkdir -p homeserver-config-overrides.d + cat > homeserver-config-overrides.d/database.yaml << EOF + ## Do not edit this file. This file is generated by flake.nix + database: + name: psycopg2 + args: + user: synapse_user + database: synapse + host: $PGHOST + cp_min: 5 + cp_max: 10 + EOF + cat > homeserver-config-overrides.d/redis.yaml << EOF + ## Do not edit this file. This file is generated by flake.nix + redis: + enabled: true + EOF + ''; + # Start synapse when `devenv up` is run. + processes.synapse.exec = "poetry run python -m synapse.app.homeserver -c homeserver.yaml --config-directory homeserver-config-overrides.d"; + # Define the perl modules we require to run SyTest. # # This list was compiled by cross-referencing https://metacpan.org/ # with the modules defined in './cpanfile' and then finding the - # corresponding nix packages on https://search.nixos.org/packages. + # corresponding Nix packages on https://search.nixos.org/packages. # # This was done until `./install-deps.pl --dryrun` produced no output. env.PERL5LIB = "${with pkgs.perl536Packages; makePerlPath [ diff --git a/mypy.ini b/mypy.ini index 5e7057cfb7..3363c6daee 100644 --- a/mypy.ini +++ b/mypy.ini @@ -13,6 +13,9 @@ no_implicit_optional = True disallow_untyped_defs = True strict_equality = True warn_redundant_casts = True +# Run mypy type checking with the minimum supported Python version to catch new usage +# that isn't backwards-compatible (types, overloads, etc). +python_version = 3.8 files = docker/, diff --git a/poetry.lock b/poetry.lock index 48a752986d..3f8bf7c304 100644 --- a/poetry.lock +++ b/poetry.lock @@ -580,20 +580,20 @@ dev = ["Sphinx", "coverage", "flake8", "lxml", "lxml-stubs", "memory-profiler", [[package]] name = "furo" -version = "2023.3.27" +version = "2023.5.20" description = "A clean customisable Sphinx documentation theme." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "furo-2023.3.27-py3-none-any.whl", hash = "sha256:4ab2be254a2d5e52792d0ca793a12c35582dd09897228a6dd47885dabd5c9521"}, - {file = "furo-2023.3.27.tar.gz", hash = "sha256:b99e7867a5cc833b2b34d7230631dd6558c7a29f93071fdbb5709634bb33c5a5"}, + {file = "furo-2023.5.20-py3-none-any.whl", hash = "sha256:594a8436ddfe0c071f3a9e9a209c314a219d8341f3f1af33fdf7c69544fab9e6"}, + {file = "furo-2023.5.20.tar.gz", hash = "sha256:40e09fa17c6f4b22419d122e933089226dcdb59747b5b6c79363089827dea16f"}, ] [package.dependencies] beautifulsoup4 = "*" pygments = ">=2.7" -sphinx = ">=5.0,<7.0" +sphinx = ">=6.0,<8.0" sphinx-basic-ng = "*" [[package]] @@ -1940,14 +1940,14 @@ email = ["email-validator (>=1.0.3)"] [[package]] name = "pygithub" -version = "1.58.1" +version = "1.58.2" description = "Use the full Github API v3" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "PyGithub-1.58.1-py3-none-any.whl", hash = "sha256:4e7fe9c3ec30d5fde5b4fbb97f18821c9dbf372bf6df337fe66f6689a65e0a83"}, - {file = "PyGithub-1.58.1.tar.gz", hash = "sha256:7d528b4ad92bc13122129fafd444ce3d04c47d2d801f6446b6e6ee2d410235b3"}, + {file = "PyGithub-1.58.2-py3-none-any.whl", hash = "sha256:f435884af617c6debaa76cbc355372d1027445a56fbc39972a3b9ed4968badc8"}, + {file = "PyGithub-1.58.2.tar.gz", hash = "sha256:1e6b1b7afe31f75151fb81f7ab6b984a7188a852bdb123dbb9ae90023c3ce60f"}, ] [package.dependencies] @@ -2251,21 +2251,21 @@ md = ["cmarkgfm (>=0.8.0)"] [[package]] name = "requests" -version = "2.28.2" +version = "2.31.0" description = "Python HTTP for Humans." category = "main" optional = false -python-versions = ">=3.7, <4" +python-versions = ">=3.7" files = [ - {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, - {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, ] [package.dependencies] certifi = ">=2017.4.17" charset-normalizer = ">=2,<4" idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<1.27" +urllib3 = ">=1.21.1,<3" [package.extras] socks = ["PySocks (>=1.5.6,!=1.5.7)"] @@ -2565,21 +2565,21 @@ files = [ [[package]] name = "sphinx" -version = "6.1.3" +version = "6.2.1" description = "Python documentation generator" category = "dev" optional = false python-versions = ">=3.8" files = [ - {file = "Sphinx-6.1.3.tar.gz", hash = "sha256:0dac3b698538ffef41716cf97ba26c1c7788dba73ce6f150c1ff5b4720786dd2"}, - {file = "sphinx-6.1.3-py3-none-any.whl", hash = "sha256:807d1cb3d6be87eb78a381c3e70ebd8d346b9a25f3753e9947e866b2786865fc"}, + {file = "Sphinx-6.2.1.tar.gz", hash = "sha256:6d56a34697bb749ffa0152feafc4b19836c755d90a7c59b72bc7dfd371b9cc6b"}, + {file = "sphinx-6.2.1-py3-none-any.whl", hash = "sha256:97787ff1fa3256a3eef9eda523a63dbf299f7b47e053cfcf684a1c2a8380c912"}, ] [package.dependencies] alabaster = ">=0.7,<0.8" babel = ">=2.9" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} -docutils = ">=0.18,<0.20" +docutils = ">=0.18.1,<0.20" imagesize = ">=1.3" importlib-metadata = {version = ">=4.8", markers = "python_version < \"3.10\""} Jinja2 = ">=3.0" @@ -2597,7 +2597,7 @@ sphinxcontrib-serializinghtml = ">=1.1.5" [package.extras] docs = ["sphinxcontrib-websupport"] lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"] -test = ["cython", "html5lib", "pytest (>=4.6)"] +test = ["cython", "filelock", "html5lib", "pytest (>=4.6)"] [[package]] name = "sphinx-autodoc2" @@ -3058,14 +3058,14 @@ files = [ [[package]] name = "types-pillow" -version = "9.5.0.2" +version = "9.5.0.4" description = "Typing stubs for Pillow" category = "dev" optional = false python-versions = "*" files = [ - {file = "types-Pillow-9.5.0.2.tar.gz", hash = "sha256:b3f9f621f259566c19c1deca21901017c8b1e3e200ed2e49e0a2d83c0a5175db"}, - {file = "types_Pillow-9.5.0.2-py3-none-any.whl", hash = "sha256:58fdebd0ffa2353ecccdd622adde23bce89da5c0c8b96c34f2d1eca7b7e42d0e"}, + {file = "types-Pillow-9.5.0.4.tar.gz", hash = "sha256:f1b6af47abd151847ee25911ffeba784899bc7dc7f9eba8ca6a5aac522b012ef"}, + {file = "types_Pillow-9.5.0.4-py3-none-any.whl", hash = "sha256:69427d9fa4320ff6e30f00fb9c0dd71185dc0a16de4757774220104759483466"}, ] [[package]] @@ -3124,14 +3124,14 @@ types-urllib3 = "*" [[package]] name = "types-setuptools" -version = "67.7.0.2" +version = "67.8.0.0" description = "Typing stubs for setuptools" category = "dev" optional = false python-versions = "*" files = [ - {file = "types-setuptools-67.7.0.2.tar.gz", hash = "sha256:155789e85e79d5682b0d341919d4beb6140408ae52bac922af25b54e36ab25c0"}, - {file = "types_setuptools-67.7.0.2-py3-none-any.whl", hash = "sha256:bd30f6dbe9b83f0a7e6e3eab6d2df748aa4f55700d54e9f077d3aa30cc019445"}, + {file = "types-setuptools-67.8.0.0.tar.gz", hash = "sha256:95c9ed61871d6c0e258433373a4e1753c0a7c3627a46f4d4058c7b5a08ab844f"}, + {file = "types_setuptools-67.8.0.0-py3-none-any.whl", hash = "sha256:6df73340d96b238a4188b7b7668814b37e8018168aef1eef94a3b1872e3f60ff"}, ] [[package]] diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index de7c56bc0f..82aeef8d19 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -128,20 +128,7 @@ USER_FILTER_SCHEMA = { "account_data": {"$ref": "#/definitions/filter"}, "room": {"$ref": "#/definitions/room_filter"}, "event_format": {"type": "string", "enum": ["client", "federation"]}, - "event_fields": { - "type": "array", - "items": { - "type": "string", - # Don't allow '\\' in event field filters. This makes matching - # events a lot easier as we can then use a negative lookbehind - # assertion to split '\.' If we allowed \\ then it would - # incorrectly split '\\.' See synapse.events.utils.serialize_event - # - # Note that because this is a regular expression, we have to escape - # each backslash in the pattern. - "pattern": r"^((?!\\\\).)*$", - }, - }, + "event_fields": {"type": "array", "items": {"type": "string"}}, }, "additionalProperties": True, # Allow new fields for forward compatibility } diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index b05fe2c589..f9aada269a 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -64,7 +64,7 @@ from synapse.util.logcontext import LoggingContext logger = logging.getLogger("synapse.app.admin_cmd") -class AdminCmdSlavedStore( +class AdminCmdStore( FilteringWorkerStore, ClientIpWorkerStore, DeviceWorkerStore, @@ -103,7 +103,7 @@ class AdminCmdSlavedStore( class AdminCmdServer(HomeServer): - DATASTORE_CLASS = AdminCmdSlavedStore # type: ignore + DATASTORE_CLASS = AdminCmdStore # type: ignore async def export_data_command(hs: HomeServer, args: argparse.Namespace) -> None: diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index e17ce35b8e..909ebccf78 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -102,7 +102,7 @@ from synapse.util.httpresourcetree import create_resource_tree logger = logging.getLogger("synapse.app.generic_worker") -class GenericWorkerSlavedStore( +class GenericWorkerStore( # FIXME(#3714): We need to add UserDirectoryStore as we write directly # rather than going via the correct worker. UserDirectoryStore, @@ -154,7 +154,7 @@ class GenericWorkerSlavedStore( class GenericWorkerServer(HomeServer): - DATASTORE_CLASS = GenericWorkerSlavedStore # type: ignore + DATASTORE_CLASS = GenericWorkerStore # type: ignore def _listen_http(self, listener_config: ListenerConfig) -> None: assert listener_config.http_options is not None diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 897dd3edac..09988670da 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -127,10 +127,6 @@ async def phone_stats_home( daily_sent_messages = await store.count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages - r30_results = await store.count_r30_users() - for name, count in r30_results.items(): - stats["r30_users_" + name] = count - r30v2_results = await store.count_r30v2_users() for name, count in r30v2_results.items(): stats["r30v2_users_" + name] = count diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 2ce60610ca..1d268a1817 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -44,6 +44,7 @@ import jinja2 import pkg_resources import yaml +from synapse.types import StrSequence from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter logger = logging.getLogger(__name__) @@ -58,7 +59,7 @@ class ConfigError(Exception): the problem lies. """ - def __init__(self, msg: str, path: Optional[Iterable[str]] = None): + def __init__(self, msg: str, path: Optional[StrSequence] = None): self.msg = msg self.path = path diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index b5cec132b4..fc51aed234 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -61,9 +61,10 @@ from synapse.config import ( # noqa: F401 voip, workers, ) +from synapse.types import StrSequence class ConfigError(Exception): - def __init__(self, msg: str, path: Optional[Iterable[str]] = None): + def __init__(self, msg: str, path: Optional[StrSequence] = None): self.msg = msg self.path = path diff --git a/synapse/config/_util.py b/synapse/config/_util.py index dfc5d12210..acccca413b 100644 --- a/synapse/config/_util.py +++ b/synapse/config/_util.py @@ -11,17 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Iterable, Type, TypeVar +from typing import Any, Dict, Type, TypeVar import jsonschema from pydantic import BaseModel, ValidationError, parse_obj_as from synapse.config._base import ConfigError -from synapse.types import JsonDict +from synapse.types import JsonDict, StrSequence def validate_config( - json_schema: JsonDict, config: Any, config_path: Iterable[str] + json_schema: JsonDict, config: Any, config_path: StrSequence ) -> None: """Validates a config setting against a JsonSchema definition @@ -45,7 +45,7 @@ def validate_config( def json_error_to_config_error( - e: jsonschema.ValidationError, config_path: Iterable[str] + e: jsonschema.ValidationError, config_path: StrSequence ) -> ConfigError: """Converts a json validation error to a user-readable ConfigError diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index fd89960e72..c2710fdf04 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -36,11 +36,10 @@ class AppServiceConfig(Config): if not isinstance(self.app_service_config_files, list) or not all( type(x) is str for x in self.app_service_config_files ): - # type-ignore: this function gets arbitrary json value; we do use this path. raise ConfigError( "Expected '%s' to be a list of AS config files:" % (self.app_service_config_files), - "app_service_config_files", + ("app_service_config_files",), ) self.track_appservice_user_ips = config.get("track_appservice_user_ips", False) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 6e453bd963..d769b7f668 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -84,18 +84,6 @@ class ExperimentalConfig(Config): "msc3984_appservice_key_query", False ) - # MSC3706 (server-side support for partial state in /send_join responses) - # Synapse will always serve partial state responses to requests using the stable - # query parameter `omit_members`. If this flag is set, Synapse will also serve - # partial state responses to requests using the unstable query parameter - # `org.matrix.msc3706.partial_state`. - self.msc3706_enabled: bool = experimental.get("msc3706_enabled", False) - - # experimental support for faster joins over federation - # (MSC2775, MSC3706, MSC3895) - # requires a target server that can provide a partial join response (MSC3706) - self.faster_joins_enabled: bool = experimental.get("faster_joins", True) - # MSC3720 (Account status endpoint) self.msc3720_enabled: bool = experimental.get("msc3720_enabled", False) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 56db875b25..1e080133dc 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -117,9 +117,7 @@ root: # Write logs to the `buffer` handler, which will buffer them together in memory, # then write them to a file. # - # Replace "buffer" with "console" to log to stderr instead. (Note that you'll - # also need to update the configuration for the `twisted` logger above, in - # this case.) + # Replace "buffer" with "console" to log to stderr instead. # handlers: [buffer] diff --git a/synapse/config/oembed.py b/synapse/config/oembed.py index 0d32aba70a..d7959639ee 100644 --- a/synapse/config/oembed.py +++ b/synapse/config/oembed.py @@ -19,7 +19,7 @@ from urllib import parse as urlparse import attr import pkg_resources -from synapse.types import JsonDict +from synapse.types import JsonDict, StrSequence from ._base import Config, ConfigError from ._util import validate_config @@ -80,7 +80,7 @@ class OembedConfig(Config): ) def _parse_and_validate_provider( - self, providers: List[JsonDict], config_path: Iterable[str] + self, providers: List[JsonDict], config_path: StrSequence ) -> Iterable[OEmbedEndpointConfig]: # Ensure it is the proper form. validate_config( @@ -112,7 +112,7 @@ class OembedConfig(Config): api_endpoint, patterns, endpoint.get("formats") ) - def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern: + def _glob_to_pattern(self, glob: str, config_path: StrSequence) -> Pattern: """ Convert the glob into a sane regular expression to match against. The rules followed will be slightly different for the domain portion vs. diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 655f06505b..f6cfdd3e04 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -224,20 +224,20 @@ class ContentRepositoryConfig(Config): if "http" in proxy_env or "https" in proxy_env: logger.warning("".join(HTTP_PROXY_SET_WARNING)) - # we always blacklist '0.0.0.0' and '::', which are supposed to be + # we always block '0.0.0.0' and '::', which are supposed to be # unroutable addresses. - self.url_preview_ip_range_blacklist = generate_ip_set( + self.url_preview_ip_range_blocklist = generate_ip_set( config["url_preview_ip_range_blacklist"], ["0.0.0.0", "::"], config_path=("url_preview_ip_range_blacklist",), ) - self.url_preview_ip_range_whitelist = generate_ip_set( + self.url_preview_ip_range_allowlist = generate_ip_set( config.get("url_preview_ip_range_whitelist", ()), config_path=("url_preview_ip_range_whitelist",), ) - self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ()) + self.url_preview_url_blocklist = config.get("url_preview_url_blacklist", ()) self.url_preview_accept_language = config.get( "url_preview_accept_language" diff --git a/synapse/config/server.py b/synapse/config/server.py index 386c3194b8..b46fa51593 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -27,7 +27,7 @@ from netaddr import AddrFormatError, IPNetwork, IPSet from twisted.conch.ssh.keys import Key from synapse.api.room_versions import KNOWN_ROOM_VERSIONS -from synapse.types import JsonDict +from synapse.types import JsonDict, StrSequence from synapse.util.module_loader import load_module from synapse.util.stringutils import parse_and_validate_server_name @@ -73,7 +73,7 @@ def _6to4(network: IPNetwork) -> IPNetwork: def generate_ip_set( ip_addresses: Optional[Iterable[str]], extra_addresses: Optional[Iterable[str]] = None, - config_path: Optional[Iterable[str]] = None, + config_path: Optional[StrSequence] = None, ) -> IPSet: """ Generate an IPSet from a list of IP addresses or CIDRs. @@ -115,7 +115,7 @@ def generate_ip_set( # IP ranges that are considered private / unroutable / don't make sense. -DEFAULT_IP_RANGE_BLACKLIST = [ +DEFAULT_IP_RANGE_BLOCKLIST = [ # Localhost "127.0.0.0/8", # Private networks. @@ -501,36 +501,36 @@ class ServerConfig(Config): # due to resource constraints self.admin_contact = config.get("admin_contact", None) - ip_range_blacklist = config.get( - "ip_range_blacklist", DEFAULT_IP_RANGE_BLACKLIST + ip_range_blocklist = config.get( + "ip_range_blacklist", DEFAULT_IP_RANGE_BLOCKLIST ) # Attempt to create an IPSet from the given ranges - # Always blacklist 0.0.0.0, :: - self.ip_range_blacklist = generate_ip_set( - ip_range_blacklist, ["0.0.0.0", "::"], config_path=("ip_range_blacklist",) + # Always block 0.0.0.0, :: + self.ip_range_blocklist = generate_ip_set( + ip_range_blocklist, ["0.0.0.0", "::"], config_path=("ip_range_blacklist",) ) - self.ip_range_whitelist = generate_ip_set( + self.ip_range_allowlist = generate_ip_set( config.get("ip_range_whitelist", ()), config_path=("ip_range_whitelist",) ) # The federation_ip_range_blacklist is used for backwards-compatibility # and only applies to federation and identity servers. if "federation_ip_range_blacklist" in config: - # Always blacklist 0.0.0.0, :: - self.federation_ip_range_blacklist = generate_ip_set( + # Always block 0.0.0.0, :: + self.federation_ip_range_blocklist = generate_ip_set( config["federation_ip_range_blacklist"], ["0.0.0.0", "::"], config_path=("federation_ip_range_blacklist",), ) # 'federation_ip_range_whitelist' was never a supported configuration option. - self.federation_ip_range_whitelist = None + self.federation_ip_range_allowlist = None else: # No backwards-compatiblity requrired, as federation_ip_range_blacklist # is not given. Default to ip_range_blacklist and ip_range_whitelist. - self.federation_ip_range_blacklist = self.ip_range_blacklist - self.federation_ip_range_whitelist = self.ip_range_whitelist + self.federation_ip_range_blocklist = self.ip_range_blocklist + self.federation_ip_range_allowlist = self.ip_range_allowlist # (undocumented) option for torturing the worker-mode replication a bit, # for testing. The value defines the number of milliseconds to pause before diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 9b4d692cf4..e7e8225b8e 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -19,6 +19,7 @@ from immutabledict import immutabledict from synapse.appservice import ApplicationService from synapse.events import EventBase +from synapse.logging.opentracing import tag_args, trace from synapse.types import JsonDict, StateMap if TYPE_CHECKING: @@ -242,6 +243,8 @@ class EventContext(UnpersistedEventContextBase): return self._state_group + @trace + @tag_args async def get_current_state_ids( self, state_filter: Optional["StateFilter"] = None ) -> Optional[StateMap[str]]: @@ -275,6 +278,8 @@ class EventContext(UnpersistedEventContextBase): return prev_state_ids + @trace + @tag_args async def get_prev_state_ids( self, state_filter: Optional["StateFilter"] = None ) -> StateMap[str]: diff --git a/synapse/events/utils.py b/synapse/events/utils.py index e6d040176b..e7b7b78b84 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -22,6 +22,7 @@ from typing import ( Iterable, List, Mapping, + Match, MutableMapping, Optional, Union, @@ -46,12 +47,10 @@ if TYPE_CHECKING: from synapse.handlers.relations import BundledAggregations -# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' -# (?<!stuff) matches if the current position in the string is not preceded -# by a match for 'stuff'. -# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as -# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar" -SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.") +# Split strings on "." but not "\." (or "\\\."). +SPLIT_FIELD_REGEX = re.compile(r"\\*\.") +# Find escaped characters, e.g. those with a \ in front of them. +ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)") CANONICALJSON_MAX_INT = (2**53) - 1 CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT @@ -253,6 +252,57 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None: sub_out_dict[key_to_move] = sub_dict[key_to_move] +def _escape_slash(m: Match[str]) -> str: + """ + Replacement function; replace a backslash-backslash or backslash-dot with the + second character. Leaves any other string alone. + """ + if m.group(1) in ("\\", "."): + return m.group(1) + return m.group(0) + + +def _split_field(field: str) -> List[str]: + """ + Splits strings on unescaped dots and removes escaping. + + Args: + field: A string representing a path to a field. + + Returns: + A list of nested fields to traverse. + """ + + # Convert the field and remove escaping: + # + # 1. "content.body.thing\.with\.dots" + # 2. ["content", "body", "thing\.with\.dots"] + # 3. ["content", "body", "thing.with.dots"] + + # Find all dots (and their preceding backslashes). If the dot is unescaped + # then emit a new field part. + result = [] + prev_start = 0 + for match in SPLIT_FIELD_REGEX.finditer(field): + # If the match is an *even* number of characters than the dot was escaped. + if len(match.group()) % 2 == 0: + continue + + # Add a new part (up to the dot, exclusive) after escaping. + result.append( + ESCAPE_SEQUENCE_PATTERN.sub( + _escape_slash, field[prev_start : match.end() - 1] + ) + ) + prev_start = match.end() + + # Add any part of the field after the last unescaped dot. (Note that if the + # character is a dot this correctly adds a blank string.) + result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:])) + + return result + + def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: """Return a new dict with only the fields in 'dictionary' which are present in 'fields'. @@ -260,7 +310,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: If there are no event fields specified then all fields are included. The entries may include '.' characters to indicate sub-fields. So ['content.body'] will include the 'body' field of the 'content' object. - A literal '.' character in a field name may be escaped using a '\'. + A literal '.' or '\' character in a field name may be escaped using a '\'. Args: dictionary: The dictionary to read from. @@ -275,13 +325,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: # for each field, convert it: # ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]] - split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields] - - # for each element of the output array of arrays: - # remove escaping so we can use the right key names. - split_fields[:] = [ - [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields - ] + split_fields = [_split_field(f) for f in fields] output: JsonDict = {} for field_array in split_fields: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index c590d8f96f..f4ca70a698 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -739,12 +739,10 @@ class FederationServer(FederationBase): "event": event_json, "state": [p.get_pdu_json(time_now) for p in state_events], "auth_chain": [p.get_pdu_json(time_now) for p in auth_chain_events], - "org.matrix.msc3706.partial_state": caller_supports_partial_state, "members_omitted": caller_supports_partial_state, } if servers_in_room is not None: - resp["org.matrix.msc3706.servers_in_room"] = list(servers_in_room) resp["servers_in_room"] = list(servers_in_room) return resp diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index d2fa9976da..1cfc4446c4 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -59,7 +59,6 @@ class TransportLayerClient: def __init__(self, hs: "HomeServer"): self.client = hs.get_federation_http_client() - self._faster_joins_enabled = hs.config.experimental.faster_joins_enabled self._is_mine_server_name = hs.is_mine_server_name async def get_room_state_ids( @@ -363,12 +362,8 @@ class TransportLayerClient: ) -> "SendJoinResponse": path = _create_v2_path("/send_join/%s/%s", room_id, event_id) query_params: Dict[str, str] = {} - if self._faster_joins_enabled: - # lazy-load state on join - query_params["org.matrix.msc3706.partial_state"] = ( - "true" if omit_members else "false" - ) - query_params["omit_members"] = "true" if omit_members else "false" + # lazy-load state on join + query_params["omit_members"] = "true" if omit_members else "false" return await self.client.put_json( destination=destination, @@ -902,9 +897,7 @@ def _members_omitted_parser(response: SendJoinResponse) -> Generator[None, Any, while True: val = yield if not isinstance(val, bool): - raise TypeError( - "members_omitted (formerly org.matrix.msc370c.partial_state) must be a boolean" - ) + raise TypeError("members_omitted must be a boolean") response.members_omitted = val @@ -967,27 +960,11 @@ class SendJoinParser(ByteParser[SendJoinResponse]): self._coros.append( ijson.items_coro( _members_omitted_parser(self._response), - "org.matrix.msc3706.partial_state", - use_float="True", - ) - ) - # The stable field name comes last, so it "wins" if the fields disagree - self._coros.append( - ijson.items_coro( - _members_omitted_parser(self._response), "members_omitted", use_float="True", ) ) - self._coros.append( - ijson.items_coro( - _servers_in_room_parser(self._response), - "org.matrix.msc3706.servers_in_room", - use_float="True", - ) - ) - # Again, stable field name comes last self._coros.append( ijson.items_coro( diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index 36b0362504..3a744e25be 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -440,7 +440,6 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet): server_name: str, ): super().__init__(hs, authenticator, ratelimiter, server_name) - self._read_msc3706_query_param = hs.config.experimental.msc3706_enabled async def on_PUT( self, @@ -453,16 +452,7 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet): # TODO(paul): assert that event_id parsed from path actually # match those given in content - partial_state = False - # The stable query parameter wins, if it disagrees with the unstable - # parameter for some reason. - stable_param = parse_boolean_from_args(query, "omit_members", default=None) - if stable_param is not None: - partial_state = stable_param - elif self._read_msc3706_query_param: - partial_state = parse_boolean_from_args( - query, "org.matrix.msc3706.partial_state", default=False - ) + partial_state = parse_boolean_from_args(query, "omit_members", default=False) result = await self.handler.on_send_join_request( origin, content, room_id, caller_supports_partial_state=partial_state diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 19dec4812f..2eb28d55ac 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -148,7 +148,7 @@ class FederationHandler: self._event_auth_handler = hs.get_event_auth_handler() self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self.config = hs.config - self.http_client = hs.get_proxied_blacklisted_http_client() + self.http_client = hs.get_proxied_blocklisted_http_client() self._replication = hs.get_replication_data_handler() self._federation_event_handler = hs.get_federation_event_handler() self._device_handler = hs.get_device_handler() diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 06343d40e4..9a08618da5 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -890,6 +890,11 @@ class FederationEventHandler: # Continue on with the events that are new to us. new_events.append(event) + set_tag( + SynapseTags.RESULT_PREFIX + "new_events.length", + str(len(new_events)), + ) + # We want to sort these by depth so we process them and # tell clients about them in order. sorted_events = sorted(new_events, key=lambda x: x.depth) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index bf0f7acf80..3031384d25 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -52,10 +52,10 @@ class IdentityHandler: # An HTTP client for contacting trusted URLs. self.http_client = SimpleHttpClient(hs) # An HTTP client for contacting identity servers specified by clients. - self.blacklisting_http_client = SimpleHttpClient( + self._http_client = SimpleHttpClient( hs, - ip_blacklist=hs.config.server.federation_ip_range_blacklist, - ip_whitelist=hs.config.server.federation_ip_range_whitelist, + ip_blocklist=hs.config.server.federation_ip_range_blocklist, + ip_allowlist=hs.config.server.federation_ip_range_allowlist, ) self.federation_http_client = hs.get_federation_http_client() self.hs = hs @@ -197,7 +197,7 @@ class IdentityHandler: try: # Use the blacklisting http client as this call is only to identity servers # provided by a client - data = await self.blacklisting_http_client.post_json_get_json( + data = await self._http_client.post_json_get_json( bind_url, bind_data, headers=headers ) @@ -308,9 +308,7 @@ class IdentityHandler: try: # Use the blacklisting http client as this call is only to identity servers # provided by a client - await self.blacklisting_http_client.post_json_get_json( - url, content, headers - ) + await self._http_client.post_json_get_json(url, content, headers) changed = True except HttpResponseException as e: changed = False @@ -579,7 +577,7 @@ class IdentityHandler: """ # Check what hashing details are supported by this identity server try: - hash_details = await self.blacklisting_http_client.get_json( + hash_details = await self._http_client.get_json( "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), {"access_token": id_access_token}, ) @@ -646,7 +644,7 @@ class IdentityHandler: headers = {"Authorization": create_id_access_token_header(id_access_token)} try: - lookup_results = await self.blacklisting_http_client.post_json_get_json( + lookup_results = await self._http_client.post_json_get_json( "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), { "addresses": [lookup_value], @@ -752,7 +750,7 @@ class IdentityHandler: url = "%s%s/_matrix/identity/v2/store-invite" % (id_server_scheme, id_server) try: - data = await self.blacklisting_http_client.post_json_get_json( + data = await self._http_client.post_json_get_json( url, invite_config, {"Authorization": create_id_access_token_header(id_access_token)}, diff --git a/synapse/handlers/jwt.py b/synapse/handlers/jwt.py new file mode 100644 index 0000000000..5fddc0e315 --- /dev/null +++ b/synapse/handlers/jwt.py @@ -0,0 +1,118 @@ +# Copyright 2023 Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import TYPE_CHECKING + +from authlib.jose import JsonWebToken, JWTClaims +from authlib.jose.errors import BadSignatureError, InvalidClaimError, JoseError + +from synapse.api.errors import Codes, LoginError, StoreError, UserDeactivatedError +from synapse.types import JsonDict, UserID + +if TYPE_CHECKING: + from synapse.server import HomeServer + + +class JwtHandler: + def __init__(self, hs: "HomeServer"): + self.hs = hs + self._main_store = hs.get_datastores().main + + self.jwt_secret = hs.config.jwt.jwt_secret + self.jwt_subject_claim = hs.config.jwt.jwt_subject_claim + self.jwt_algorithm = hs.config.jwt.jwt_algorithm + self.jwt_issuer = hs.config.jwt.jwt_issuer + self.jwt_audiences = hs.config.jwt.jwt_audiences + + async def validate_login(self, login_submission: JsonDict) -> str: + """ + Authenticates the user for the /login API + + Args: + login_submission: the whole of the login submission + (including 'type' and other relevant fields) + + Returns: + The user ID that is logging in. + + Raises: + LoginError if there was an authentication problem. + """ + token = login_submission.get("token", None) + if token is None: + raise LoginError( + 403, "Token field for JWT is missing", errcode=Codes.FORBIDDEN + ) + + jwt = JsonWebToken([self.jwt_algorithm]) + claim_options = {} + if self.jwt_issuer is not None: + claim_options["iss"] = {"value": self.jwt_issuer, "essential": True} + if self.jwt_audiences is not None: + claim_options["aud"] = {"values": self.jwt_audiences, "essential": True} + + try: + claims = jwt.decode( + token, + key=self.jwt_secret, + claims_cls=JWTClaims, + claims_options=claim_options, + ) + except BadSignatureError: + # We handle this case separately to provide a better error message + raise LoginError( + 403, + "JWT validation failed: Signature verification failed", + errcode=Codes.FORBIDDEN, + ) + except JoseError as e: + # A JWT error occurred, return some info back to the client. + raise LoginError( + 403, + "JWT validation failed: %s" % (str(e),), + errcode=Codes.FORBIDDEN, + ) + + try: + claims.validate(leeway=120) # allows 2 min of clock skew + + # Enforce the old behavior which is rolled out in productive + # servers: if the JWT contains an 'aud' claim but none is + # configured, the login attempt will fail + if claims.get("aud") is not None: + if self.jwt_audiences is None or len(self.jwt_audiences) == 0: + raise InvalidClaimError("aud") + except JoseError as e: + raise LoginError( + 403, + "JWT validation failed: %s" % (str(e),), + errcode=Codes.FORBIDDEN, + ) + + user = claims.get(self.jwt_subject_claim, None) + if user is None: + raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN) + + user_id = UserID(user, self.hs.hostname).to_string() + + # If the account has been deactivated, do not proceed with the login + # flow. + try: + deactivated = await self._main_store.get_user_deactivated_status(user_id) + except StoreError: + # JWT lazily creates users, so they may not exist in the database yet. + deactivated = False + if deactivated: + raise UserDeactivatedError("This account has been deactivated") + + return user_id diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py index 6d35e61880..49a497a860 100644 --- a/synapse/handlers/read_marker.py +++ b/synapse/handlers/read_marker.py @@ -16,6 +16,7 @@ import logging from typing import TYPE_CHECKING from synapse.api.constants import ReceiptTypes +from synapse.api.errors import SynapseError from synapse.util.async_helpers import Linearizer if TYPE_CHECKING: @@ -47,12 +48,21 @@ class ReadMarkerHandler: ) should_update = True + # Get event ordering, this also ensures we know about the event + event_ordering = await self.store.get_event_ordering(event_id) if existing_read_marker: - # Only update if the new marker is ahead in the stream - should_update = await self.store.is_event_after( - event_id, existing_read_marker["event_id"] - ) + try: + old_event_ordering = await self.store.get_event_ordering( + existing_read_marker["event_id"] + ) + except SynapseError: + # Old event no longer exists, assume new is ahead. This may + # happen if the old event was removed due to retention. + pass + else: + # Only update if the new marker is ahead in the stream + should_update = event_ordering > old_event_ordering if should_update: content = {"event_id": event_id} diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 25fd2eb3a1..c3a51722bd 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -204,7 +204,7 @@ class SsoHandler: self._media_repo = ( hs.get_media_repository() if hs.config.media.can_load_media_repo else None ) - self._http_client = hs.get_proxied_blacklisted_http_client() + self._http_client = hs.get_proxied_blocklisted_http_client() # The following template is shown after a successful user interactive # authentication session. It tells the user they can close the window. diff --git a/synapse/http/client.py b/synapse/http/client.py index c9479c81ff..f1ab7a8bc9 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -117,22 +117,22 @@ RawHeaderValue = Union[ ] -def check_against_blacklist( - ip_address: IPAddress, ip_whitelist: Optional[IPSet], ip_blacklist: IPSet +def _is_ip_blocked( + ip_address: IPAddress, allowlist: Optional[IPSet], blocklist: IPSet ) -> bool: """ Compares an IP address to allowed and disallowed IP sets. Args: ip_address: The IP address to check - ip_whitelist: Allowed IP addresses. - ip_blacklist: Disallowed IP addresses. + allowlist: Allowed IP addresses. + blocklist: Disallowed IP addresses. Returns: - True if the IP address is in the blacklist and not in the whitelist. + True if the IP address is in the blocklist and not in the allowlist. """ - if ip_address in ip_blacklist: - if ip_whitelist is None or ip_address not in ip_whitelist: + if ip_address in blocklist: + if allowlist is None or ip_address not in allowlist: return True return False @@ -154,27 +154,27 @@ def _make_scheduler( return _scheduler -class _IPBlacklistingResolver: +class _IPBlockingResolver: """ - A proxy for reactor.nameResolver which only produces non-blacklisted IP - addresses, preventing DNS rebinding attacks on URL preview. + A proxy for reactor.nameResolver which only produces non-blocklisted IP + addresses, preventing DNS rebinding attacks. """ def __init__( self, reactor: IReactorPluggableNameResolver, - ip_whitelist: Optional[IPSet], - ip_blacklist: IPSet, + ip_allowlist: Optional[IPSet], + ip_blocklist: IPSet, ): """ Args: reactor: The twisted reactor. - ip_whitelist: IP addresses to allow. - ip_blacklist: IP addresses to disallow. + ip_allowlist: IP addresses to allow. + ip_blocklist: IP addresses to disallow. """ self._reactor = reactor - self._ip_whitelist = ip_whitelist - self._ip_blacklist = ip_blacklist + self._ip_allowlist = ip_allowlist + self._ip_blocklist = ip_blocklist def resolveHostName( self, recv: IResolutionReceiver, hostname: str, portNumber: int = 0 @@ -191,16 +191,13 @@ class _IPBlacklistingResolver: ip_address = IPAddress(address.host) - if check_against_blacklist( - ip_address, self._ip_whitelist, self._ip_blacklist - ): + if _is_ip_blocked(ip_address, self._ip_allowlist, self._ip_blocklist): logger.info( - "Dropped %s from DNS resolution to %s due to blacklist" - % (ip_address, hostname) + "Blocked %s from DNS resolution to %s" % (ip_address, hostname) ) has_bad_ip = True - # if we have a blacklisted IP, we'd like to raise an error to block the + # if we have a blocked IP, we'd like to raise an error to block the # request, but all we can really do from here is claim that there were no # valid results. if not has_bad_ip: @@ -232,24 +229,24 @@ class _IPBlacklistingResolver: # ISynapseReactor implies IReactorCore, but explicitly marking it this as an implementer # of IReactorCore seems to keep mypy-zope happier. @implementer(IReactorCore, ISynapseReactor) -class BlacklistingReactorWrapper: +class BlocklistingReactorWrapper: """ - A Reactor wrapper which will prevent DNS resolution to blacklisted IP + A Reactor wrapper which will prevent DNS resolution to blocked IP addresses, to prevent DNS rebinding. """ def __init__( self, reactor: IReactorPluggableNameResolver, - ip_whitelist: Optional[IPSet], - ip_blacklist: IPSet, + ip_allowlist: Optional[IPSet], + ip_blocklist: IPSet, ): self._reactor = reactor - # We need to use a DNS resolver which filters out blacklisted IP + # We need to use a DNS resolver which filters out blocked IP # addresses, to prevent DNS rebinding. - self._nameResolver = _IPBlacklistingResolver( - self._reactor, ip_whitelist, ip_blacklist + self._nameResolver = _IPBlockingResolver( + self._reactor, ip_allowlist, ip_blocklist ) def __getattr__(self, attr: str) -> Any: @@ -260,7 +257,7 @@ class BlacklistingReactorWrapper: return getattr(self._reactor, attr) -class BlacklistingAgentWrapper(Agent): +class BlocklistingAgentWrapper(Agent): """ An Agent wrapper which will prevent access to IP addresses being accessed directly (without an IP address lookup). @@ -269,18 +266,18 @@ class BlacklistingAgentWrapper(Agent): def __init__( self, agent: IAgent, - ip_blacklist: IPSet, - ip_whitelist: Optional[IPSet] = None, + ip_blocklist: IPSet, + ip_allowlist: Optional[IPSet] = None, ): """ Args: agent: The Agent to wrap. - ip_whitelist: IP addresses to allow. - ip_blacklist: IP addresses to disallow. + ip_allowlist: IP addresses to allow. + ip_blocklist: IP addresses to disallow. """ self._agent = agent - self._ip_whitelist = ip_whitelist - self._ip_blacklist = ip_blacklist + self._ip_allowlist = ip_allowlist + self._ip_blocklist = ip_blocklist def request( self, @@ -299,13 +296,9 @@ class BlacklistingAgentWrapper(Agent): # Not an IP pass else: - if check_against_blacklist( - ip_address, self._ip_whitelist, self._ip_blacklist - ): - logger.info("Blocking access to %s due to blacklist" % (ip_address,)) - e = SynapseError( - HTTPStatus.FORBIDDEN, "IP address blocked by IP blacklist entry" - ) + if _is_ip_blocked(ip_address, self._ip_allowlist, self._ip_blocklist): + logger.info("Blocking access to %s" % (ip_address,)) + e = SynapseError(HTTPStatus.FORBIDDEN, "IP address blocked") return defer.fail(Failure(e)) return self._agent.request( @@ -763,10 +756,9 @@ class SimpleHttpClient(BaseHttpClient): Args: hs: The HomeServer instance to pass in treq_args: Extra keyword arguments to be given to treq.request. - ip_blacklist: The IP addresses that are blacklisted that - we may not request. - ip_whitelist: The whitelisted IP addresses, that we can - request if it were otherwise caught in a blacklist. + ip_blocklist: The IP addresses that we may not request. + ip_allowlist: The allowed IP addresses, that we can + request if it were otherwise caught in a blocklist. use_proxy: Whether proxy settings should be discovered and used from conventional environment variables. """ @@ -775,19 +767,19 @@ class SimpleHttpClient(BaseHttpClient): self, hs: "HomeServer", treq_args: Optional[Dict[str, Any]] = None, - ip_whitelist: Optional[IPSet] = None, - ip_blacklist: Optional[IPSet] = None, + ip_allowlist: Optional[IPSet] = None, + ip_blocklist: Optional[IPSet] = None, use_proxy: bool = False, ): super().__init__(hs, treq_args=treq_args) - self._ip_whitelist = ip_whitelist - self._ip_blacklist = ip_blacklist - - if self._ip_blacklist: - # If we have an IP blacklist, we need to use a DNS resolver which - # filters out blacklisted IP addresses, to prevent DNS rebinding. - self.reactor: ISynapseReactor = BlacklistingReactorWrapper( - self.reactor, self._ip_whitelist, self._ip_blacklist + self._ip_allowlist = ip_allowlist + self._ip_blocklist = ip_blocklist + + if self._ip_blocklist: + # If we have an IP blocklist, we need to use a DNS resolver which + # filters out blocked IP addresses, to prevent DNS rebinding. + self.reactor: ISynapseReactor = BlocklistingReactorWrapper( + self.reactor, self._ip_allowlist, self._ip_blocklist ) # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to @@ -809,14 +801,13 @@ class SimpleHttpClient(BaseHttpClient): use_proxy=use_proxy, ) - if self._ip_blacklist: - # If we have an IP blacklist, we then install the blacklisting Agent - # which prevents direct access to IP addresses, that are not caught - # by the DNS resolution. - self.agent = BlacklistingAgentWrapper( + if self._ip_blocklist: + # If we have an IP blocklist, we then install the Agent which prevents + # direct access to IP addresses, that are not caught by the DNS resolution. + self.agent = BlocklistingAgentWrapper( self.agent, - ip_blacklist=self._ip_blacklist, - ip_whitelist=self._ip_whitelist, + ip_blocklist=self._ip_blocklist, + ip_allowlist=self._ip_allowlist, ) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 8d7d0a3875..7e8cf31682 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -36,7 +36,7 @@ from twisted.web.iweb import IAgent, IAgentEndpointFactory, IBodyProducer, IResp from synapse.crypto.context_factory import FederationPolicyForHTTPS from synapse.http import proxyagent -from synapse.http.client import BlacklistingAgentWrapper, BlacklistingReactorWrapper +from synapse.http.client import BlocklistingAgentWrapper, BlocklistingReactorWrapper from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint from synapse.http.federation.srv_resolver import Server, SrvResolver from synapse.http.federation.well_known_resolver import WellKnownResolver @@ -65,12 +65,12 @@ class MatrixFederationAgent: user_agent: The user agent header to use for federation requests. - ip_whitelist: Allowed IP addresses. + ip_allowlist: Allowed IP addresses. - ip_blacklist: Disallowed IP addresses. + ip_blocklist: Disallowed IP addresses. proxy_reactor: twisted reactor to use for connections to the proxy server - reactor might have some blacklisting applied (i.e. for DNS queries), + reactor might have some blocking applied (i.e. for DNS queries), but we need unblocked access to the proxy. _srv_resolver: @@ -87,17 +87,17 @@ class MatrixFederationAgent: reactor: ISynapseReactor, tls_client_options_factory: Optional[FederationPolicyForHTTPS], user_agent: bytes, - ip_whitelist: Optional[IPSet], - ip_blacklist: IPSet, + ip_allowlist: Optional[IPSet], + ip_blocklist: IPSet, _srv_resolver: Optional[SrvResolver] = None, _well_known_resolver: Optional[WellKnownResolver] = None, ): - # proxy_reactor is not blacklisted + # proxy_reactor is not blocklisting reactor proxy_reactor = reactor - # We need to use a DNS resolver which filters out blacklisted IP + # We need to use a DNS resolver which filters out blocked IP # addresses, to prevent DNS rebinding. - reactor = BlacklistingReactorWrapper(reactor, ip_whitelist, ip_blacklist) + reactor = BlocklistingReactorWrapper(reactor, ip_allowlist, ip_blocklist) self._clock = Clock(reactor) self._pool = HTTPConnectionPool(reactor) @@ -120,7 +120,7 @@ class MatrixFederationAgent: if _well_known_resolver is None: _well_known_resolver = WellKnownResolver( reactor, - agent=BlacklistingAgentWrapper( + agent=BlocklistingAgentWrapper( ProxyAgent( reactor, proxy_reactor, @@ -128,7 +128,7 @@ class MatrixFederationAgent: contextFactory=tls_client_options_factory, use_proxy=True, ), - ip_blacklist=ip_blacklist, + ip_blocklist=ip_blocklist, ), user_agent=self.user_agent, ) @@ -256,7 +256,7 @@ class MatrixHostnameEndpoint: Args: reactor: twisted reactor to use for underlying requests proxy_reactor: twisted reactor to use for connections to the proxy server. - 'reactor' might have some blacklisting applied (i.e. for DNS queries), + 'reactor' might have some blocking applied (i.e. for DNS queries), but we need unblocked access to the proxy. tls_client_options_factory: factory to use for fetching client tls options, or none to disable TLS. diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 634882487c..9094dab0fe 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -64,7 +64,7 @@ from synapse.api.errors import ( from synapse.crypto.context_factory import FederationPolicyForHTTPS from synapse.http import QuieterFileBodyProducer from synapse.http.client import ( - BlacklistingAgentWrapper, + BlocklistingAgentWrapper, BodyExceededMaxSize, ByteWriteable, _make_scheduler, @@ -392,15 +392,15 @@ class MatrixFederationHttpClient: self.reactor, tls_client_options_factory, user_agent.encode("ascii"), - hs.config.server.federation_ip_range_whitelist, - hs.config.server.federation_ip_range_blacklist, + hs.config.server.federation_ip_range_allowlist, + hs.config.server.federation_ip_range_blocklist, ) - # Use a BlacklistingAgentWrapper to prevent circumventing the IP - # blacklist via IP literals in server names - self.agent = BlacklistingAgentWrapper( + # Use a BlocklistingAgentWrapper to prevent circumventing the IP + # blocking via IP literals in server names + self.agent = BlocklistingAgentWrapper( federation_agent, - ip_blacklist=hs.config.server.federation_ip_range_blacklist, + ip_blocklist=hs.config.server.federation_ip_range_blocklist, ) self.clock = hs.get_clock() diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index 94ef737b9e..7bdc4acae7 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -53,7 +53,7 @@ class ProxyAgent(_AgentBase): connections. proxy_reactor: twisted reactor to use for connections to the proxy server - reactor might have some blacklisting applied (i.e. for DNS queries), + reactor might have some blocking applied (i.e. for DNS queries), but we need unblocked access to the proxy. contextFactory: A factory for TLS contexts, to control the diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index c8a4a809f1..70b32cee17 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -105,7 +105,7 @@ class UrlPreviewer: When Synapse is asked to preview a URL it does the following: - 1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the + 1. Checks against a URL blocklist (defined as `url_preview_url_blacklist` in the config). 2. Checks the URL against an in-memory cache and returns the result if it exists. (This is also used to de-duplicate processing of multiple in-flight requests at once.) @@ -113,7 +113,7 @@ class UrlPreviewer: 1. Checks URL and timestamp against the database cache and returns the result if it has not expired and was successful (a 2xx return code). 2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it - does, update the URL to download. + does and the new URL is not blocked, update the URL to download. 3. Downloads the URL and stores it into a file via the media storage provider and saves the local media metadata. 4. If the media is an image: @@ -127,14 +127,14 @@ class UrlPreviewer: and saves the local media metadata. 2. Convert the oEmbed response to an Open Graph response. 3. Override any Open Graph data from the HTML with data from oEmbed. - 4. If an image exists in the Open Graph response: + 4. If an image URL exists in the Open Graph response: 1. Downloads the URL and stores it into a file via the media storage provider and saves the local media metadata. 2. Generates thumbnails. 3. Updates the Open Graph response based on image properties. - 6. If the media is JSON and an oEmbed URL was found: + 6. If an oEmbed URL was found and the media is JSON: 1. Convert the oEmbed response to an Open Graph response. - 2. If a thumbnail or image is in the oEmbed response: + 2. If an image URL is in the oEmbed response: 1. Downloads the URL and stores it into a file via the media storage provider and saves the local media metadata. 2. Generates thumbnails. @@ -144,7 +144,8 @@ class UrlPreviewer: If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole - does not fail. As much information as possible is returned. + does not fail. If any of them are blocked, then those additional requests + are skipped. As much information as possible is returned. The in-memory cache expires after 1 hour. @@ -166,8 +167,8 @@ class UrlPreviewer: self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, - ip_whitelist=hs.config.media.url_preview_ip_range_whitelist, - ip_blacklist=hs.config.media.url_preview_ip_range_blacklist, + ip_allowlist=hs.config.media.url_preview_ip_range_allowlist, + ip_blocklist=hs.config.media.url_preview_ip_range_blocklist, use_proxy=True, ) self.media_repo = media_repo @@ -185,7 +186,7 @@ class UrlPreviewer: or instance_running_jobs == hs.get_instance_name() ) - self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist + self.url_preview_url_blocklist = hs.config.media.url_preview_url_blocklist self.url_preview_accept_language = hs.config.media.url_preview_accept_language # memory cache mapping urls to an ObservableDeferred returning @@ -203,48 +204,14 @@ class UrlPreviewer: ) async def preview(self, url: str, user: UserID, ts: int) -> bytes: - # XXX: we could move this into _do_preview if we wanted. - url_tuple = urlsplit(url) - for entry in self.url_preview_url_blacklist: - match = True - for attrib in entry: - pattern = entry[attrib] - value = getattr(url_tuple, attrib) - logger.debug( - "Matching attrib '%s' with value '%s' against pattern '%s'", - attrib, - value, - pattern, - ) - - if value is None: - match = False - continue - - # Some attributes might not be parsed as strings by urlsplit (such as the - # port, which is parsed as an int). Because we use match functions that - # expect strings, we want to make sure that's what we give them. - value_str = str(value) - - if pattern.startswith("^"): - if not re.match(pattern, value_str): - match = False - continue - else: - if not fnmatch.fnmatch(value_str, pattern): - match = False - continue - if match: - logger.warning("URL %s blocked by url_blacklist entry %s", url, entry) - raise SynapseError( - 403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN - ) - # the in-memory cache: - # * ensures that only one request is active at a time + # * ensures that only one request to a URL is active at a time # * takes load off the DB for the thundering herds # * also caches any failures (unlike the DB) so we don't keep - # requesting the same endpoint + # requesting the same endpoint + # + # Note that autodiscovered oEmbed URLs and pre-caching of images + # are not captured in the in-memory cache. observable = self._cache.get(url) @@ -283,7 +250,7 @@ class UrlPreviewer: og = og.encode("utf8") return og - # If this URL can be accessed via oEmbed, use that instead. + # If this URL can be accessed via an allowed oEmbed, use that instead. url_to_download = url oembed_url = self._oembed.get_oembed_url(url) if oembed_url: @@ -329,6 +296,7 @@ class UrlPreviewer: # defer to that. oembed_url = self._oembed.autodiscover_from_html(tree) og_from_oembed: JsonDict = {} + # Only download to the oEmbed URL if it is allowed. if oembed_url: try: oembed_info = await self._handle_url( @@ -411,6 +379,59 @@ class UrlPreviewer: return jsonog.encode("utf8") + def _is_url_blocked(self, url: str) -> bool: + """ + Check whether the URL is allowed to be previewed (according to the homeserver + configuration). + + Args: + url: The requested URL. + + Return: + True if the URL is blocked, False if it is allowed. + """ + url_tuple = urlsplit(url) + for entry in self.url_preview_url_blocklist: + match = True + # Iterate over each entry. If *all* attributes of that entry match + # the current URL, then reject it. + for attrib, pattern in entry.items(): + value = getattr(url_tuple, attrib) + logger.debug( + "Matching attrib '%s' with value '%s' against pattern '%s'", + attrib, + value, + pattern, + ) + + if value is None: + match = False + break + + # Some attributes might not be parsed as strings by urlsplit (such as the + # port, which is parsed as an int). Because we use match functions that + # expect strings, we want to make sure that's what we give them. + value_str = str(value) + + # Check the value against the pattern as either a regular expression or + # a glob. If it doesn't match, the entry doesn't match. + if pattern.startswith("^"): + if not re.match(pattern, value_str): + match = False + break + else: + if not fnmatch.fnmatch(value_str, pattern): + match = False + break + + # All fields matched, return true (the URL is blocked). + if match: + logger.warning("URL %s blocked by entry %s", url, entry) + return match + + # No matches were found, the URL is allowed. + return False + async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult: """ Fetches a remote URL and parses the headers. @@ -451,7 +472,7 @@ class UrlPreviewer: except DNSLookupError: # DNS lookup returned no results # Note: This will also be the case if one of the resolved IP - # addresses is blacklisted + # addresses is blocked. raise SynapseError( 502, "DNS resolution failure during URL preview generation", @@ -547,8 +568,16 @@ class UrlPreviewer: Returns: A MediaInfo object describing the fetched content. + + Raises: + SynapseError if the URL is blocked. """ + if self._is_url_blocked(url): + raise SynapseError( + 403, "URL blocked by url pattern blocklist entry", Codes.UNKNOWN + ) + # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? @@ -624,7 +653,7 @@ class UrlPreviewer: return # The image URL from the HTML might be relative to the previewed page, - # convert it to an URL which can be requested directly. + # convert it to a URL which can be requested directly. url_parts = urlparse(image_url) if url_parts.scheme != "data": image_url = urljoin(media_info.uri, image_url) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 2c9d181acf..0e9f366cba 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -134,7 +134,7 @@ from synapse.util.caches.descriptors import CachedFunction, cached as _cached from synapse.util.frozenutils import freeze if TYPE_CHECKING: - from synapse.app.generic_worker import GenericWorkerSlavedStore + from synapse.app.generic_worker import GenericWorkerStore from synapse.server import HomeServer @@ -237,9 +237,7 @@ class ModuleApi: # TODO: Fix this type hint once the types for the data stores have been ironed # out. - self._store: Union[ - DataStore, "GenericWorkerSlavedStore" - ] = hs.get_datastores().main + self._store: Union[DataStore, "GenericWorkerStore"] = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self._auth = hs.get_auth() self._auth_handler = auth_handler diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index e91ee05e99..50027680cb 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -143,7 +143,7 @@ class HttpPusher(Pusher): ) self.url = url - self.http_client = hs.get_proxied_blacklisted_http_client() + self.http_client = hs.get_proxied_blocklisted_http_client() self.data_minus_url = {} self.data_minus_url.update(self.data) del self.data_minus_url["url"] diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 200f667fdf..139f57cf86 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -60,7 +60,7 @@ _WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 5 class ReplicationDataHandler: """Handles incoming stream updates from replication. - This instance notifies the slave data store about updates. Can be subclassed + This instance notifies the data store about updates. Can be subclassed to handle updates in additional ways. """ @@ -91,7 +91,7 @@ class ReplicationDataHandler: ) -> None: """Called to handle a batch of replication data with a given stream token. - By default this just pokes the slave store. Can be overridden in subclasses to + By default, this just pokes the data store. Can be overridden in subclasses to handle more. Args: diff --git a/synapse/rest/admin/devices.py b/synapse/rest/admin/devices.py index 3b2f2d9abb..11ebed9bfd 100644 --- a/synapse/rest/admin/devices.py +++ b/synapse/rest/admin/devices.py @@ -137,6 +137,35 @@ class DevicesRestServlet(RestServlet): devices = await self.device_handler.get_devices_by_user(target_user.to_string()) return HTTPStatus.OK, {"devices": devices, "total": len(devices)} + async def on_POST( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: + """Creates a new device for the user.""" + await assert_requester_is_admin(self.auth, request) + + target_user = UserID.from_string(user_id) + if not self.is_mine(target_user): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Can only create devices for local users" + ) + + u = await self.store.get_user_by_id(target_user.to_string()) + if u is None: + raise NotFoundError("Unknown user") + + body = parse_json_object_from_request(request) + device_id = body.get("device_id") + if not device_id: + raise SynapseError(HTTPStatus.BAD_REQUEST, "Missing device_id") + if not isinstance(device_id, str): + raise SynapseError(HTTPStatus.BAD_REQUEST, "device_id must be a string") + + await self.device_handler.check_device_registered( + user_id=user_id, device_id=device_id + ) + + return HTTPStatus.CREATED, {} + class DeleteDevicesRestServlet(RestServlet): """ diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index a348720131..afdbf821b5 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -87,11 +87,6 @@ class LoginRestServlet(RestServlet): # JWT configuration variables. self.jwt_enabled = hs.config.jwt.jwt_enabled - self.jwt_secret = hs.config.jwt.jwt_secret - self.jwt_subject_claim = hs.config.jwt.jwt_subject_claim - self.jwt_algorithm = hs.config.jwt.jwt_algorithm - self.jwt_issuer = hs.config.jwt.jwt_issuer - self.jwt_audiences = hs.config.jwt.jwt_audiences # SSO configuration. self.saml2_enabled = hs.config.saml2.saml2_enabled @@ -427,7 +422,7 @@ class LoginRestServlet(RestServlet): self, login_submission: JsonDict, should_issue_refresh_token: bool = False ) -> LoginResponse: """ - Handle the final stage of SSO login. + Handle token login. Args: login_submission: The JSON request body. @@ -452,72 +447,24 @@ class LoginRestServlet(RestServlet): async def _do_jwt_login( self, login_submission: JsonDict, should_issue_refresh_token: bool = False ) -> LoginResponse: - token = login_submission.get("token", None) - if token is None: - raise LoginError( - 403, "Token field for JWT is missing", errcode=Codes.FORBIDDEN - ) - - from authlib.jose import JsonWebToken, JWTClaims - from authlib.jose.errors import BadSignatureError, InvalidClaimError, JoseError - - jwt = JsonWebToken([self.jwt_algorithm]) - claim_options = {} - if self.jwt_issuer is not None: - claim_options["iss"] = {"value": self.jwt_issuer, "essential": True} - if self.jwt_audiences is not None: - claim_options["aud"] = {"values": self.jwt_audiences, "essential": True} - - try: - claims = jwt.decode( - token, - key=self.jwt_secret, - claims_cls=JWTClaims, - claims_options=claim_options, - ) - except BadSignatureError: - # We handle this case separately to provide a better error message - raise LoginError( - 403, - "JWT validation failed: Signature verification failed", - errcode=Codes.FORBIDDEN, - ) - except JoseError as e: - # A JWT error occurred, return some info back to the client. - raise LoginError( - 403, - "JWT validation failed: %s" % (str(e),), - errcode=Codes.FORBIDDEN, - ) - - try: - claims.validate(leeway=120) # allows 2 min of clock skew - - # Enforce the old behavior which is rolled out in productive - # servers: if the JWT contains an 'aud' claim but none is - # configured, the login attempt will fail - if claims.get("aud") is not None: - if self.jwt_audiences is None or len(self.jwt_audiences) == 0: - raise InvalidClaimError("aud") - except JoseError as e: - raise LoginError( - 403, - "JWT validation failed: %s" % (str(e),), - errcode=Codes.FORBIDDEN, - ) + """ + Handle the custom JWT login. - user = claims.get(self.jwt_subject_claim, None) - if user is None: - raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN) + Args: + login_submission: The JSON request body. + should_issue_refresh_token: True if this login should issue + a refresh token alongside the access token. - user_id = UserID(user, self.hs.hostname).to_string() - result = await self._complete_login( + Returns: + The body of the JSON response. + """ + user_id = await self.hs.get_jwt_handler().validate_login(login_submission) + return await self._complete_login( user_id, login_submission, create_non_existent_users=True, should_issue_refresh_token=should_issue_refresh_token, ) - return result def _get_auth_flow_dict_for_idp(idp: SsoIdentityProvider) -> JsonDict: diff --git a/synapse/rest/client/mutual_rooms.py b/synapse/rest/client/mutual_rooms.py index 38ef4e459f..c99445da30 100644 --- a/synapse/rest/client/mutual_rooms.py +++ b/synapse/rest/client/mutual_rooms.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Tuple +from http import HTTPStatus +from typing import TYPE_CHECKING, Dict, List, Tuple from synapse.api.errors import Codes, SynapseError from synapse.http.server import HttpServer -from synapse.http.servlet import RestServlet +from synapse.http.servlet import RestServlet, parse_strings_from_args from synapse.http.site import SynapseRequest -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict from ._base import client_patterns @@ -30,11 +31,11 @@ logger = logging.getLogger(__name__) class UserMutualRoomsServlet(RestServlet): """ - GET /uk.half-shot.msc2666/user/mutual_rooms/{user_id} HTTP/1.1 + GET /uk.half-shot.msc2666/user/mutual_rooms?user_id={user_id} HTTP/1.1 """ PATTERNS = client_patterns( - "/uk.half-shot.msc2666/user/mutual_rooms/(?P<user_id>[^/]*)", + "/uk.half-shot.msc2666/user/mutual_rooms$", releases=(), # This is an unstable feature ) @@ -43,17 +44,35 @@ class UserMutualRoomsServlet(RestServlet): self.auth = hs.get_auth() self.store = hs.get_datastores().main - async def on_GET( - self, request: SynapseRequest, user_id: str - ) -> Tuple[int, JsonDict]: - UserID.from_string(user_id) + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + # twisted.web.server.Request.args is incorrectly defined as Optional[Any] + args: Dict[bytes, List[bytes]] = request.args # type: ignore + + user_ids = parse_strings_from_args(args, "user_id", required=True) + + if len(user_ids) > 1: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Duplicate user_id query parameter", + errcode=Codes.INVALID_PARAM, + ) + + # We don't do batching, so a batch token is illegal by default + if b"batch_token" in args: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Unknown batch_token", + errcode=Codes.INVALID_PARAM, + ) + + user_id = user_ids[0] requester = await self.auth.get_user_by_req(request) if user_id == requester.user.to_string(): raise SynapseError( - code=400, - msg="You cannot request a list of shared rooms with yourself", - errcode=Codes.FORBIDDEN, + HTTPStatus.UNPROCESSABLE_ENTITY, + "You cannot request a list of shared rooms with yourself", + errcode=Codes.INVALID_PARAM, ) rooms = await self.store.get_mutual_rooms_between_users( diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 58c5b07390..32df054f56 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -91,7 +91,7 @@ class VersionsRestServlet(RestServlet): # Implements additional endpoints as described in MSC2432 "org.matrix.msc2432": True, # Implements additional endpoints as described in MSC2666 - "uk.half-shot.msc2666.mutual_rooms": True, + "uk.half-shot.msc2666.query_mutual_rooms": True, # Whether new rooms will be set to encrypted or not (based on presets). "io.element.e2ee_forced.public": self.e2ee_forced_public, "io.element.e2ee_forced.private": self.e2ee_forced_private, diff --git a/synapse/server.py b/synapse/server.py index b307295789..f6e245569c 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -147,6 +147,7 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: from txredisapi import ConnectionHandler + from synapse.handlers.jwt import JwtHandler from synapse.handlers.oidc import OidcHandler from synapse.handlers.saml import SamlHandler @@ -453,15 +454,15 @@ class HomeServer(metaclass=abc.ABCMeta): return SimpleHttpClient(self, use_proxy=True) @cache_in_self - def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: + def get_proxied_blocklisted_http_client(self) -> SimpleHttpClient: """ - An HTTP client that uses configured HTTP(S) proxies and blacklists IPs - based on the IP range blacklist/whitelist. + An HTTP client that uses configured HTTP(S) proxies and blocks IPs + based on the configured IP ranges. """ return SimpleHttpClient( self, - ip_whitelist=self.config.server.ip_range_whitelist, - ip_blacklist=self.config.server.ip_range_blacklist, + ip_allowlist=self.config.server.ip_range_allowlist, + ip_blocklist=self.config.server.ip_range_blocklist, use_proxy=True, ) @@ -534,6 +535,12 @@ class HomeServer(metaclass=abc.ABCMeta): return SsoHandler(self) @cache_in_self + def get_jwt_handler(self) -> "JwtHandler": + from synapse.handlers.jwt import JwtHandler + + return JwtHandler(self) + + @cache_in_self def get_sync_handler(self) -> SyncHandler: return SyncHandler(self) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 6031095249..9bc0c3b7b9 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -45,6 +45,7 @@ from synapse.events.snapshot import ( UnpersistedEventContextBase, ) from synapse.logging.context import ContextResourceUsage +from synapse.logging.opentracing import tag_args, trace from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour @@ -270,6 +271,8 @@ class StateHandler: state = await entry.get_state(self._state_storage_controller, StateFilter.all()) return await self.store.get_joined_hosts(room_id, state, entry) + @trace + @tag_args async def calculate_context_info( self, event: EventBase, @@ -465,6 +468,7 @@ class StateHandler: return await unpersisted_context.persist(event) + @trace @measure_func() async def resolve_state_groups_for_events( self, room_id: str, event_ids: Collection[str], await_full_state: bool = True diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 9d7a8a792f..06a80869eb 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -67,6 +67,8 @@ class StateStorageController: """ self._partial_state_room_tracker.notify_un_partial_stated(room_id) + @trace + @tag_args async def get_state_group_delta( self, state_group: int ) -> Tuple[Optional[int], Optional[StateMap[str]]]: @@ -84,6 +86,8 @@ class StateStorageController: state_group_delta = await self.stores.state.get_state_group_delta(state_group) return state_group_delta.prev_group, state_group_delta.delta_ids + @trace + @tag_args async def get_state_groups_ids( self, _room_id: str, event_ids: Collection[str], await_full_state: bool = True ) -> Dict[int, MutableStateMap[str]]: @@ -114,6 +118,8 @@ class StateStorageController: return group_to_state + @trace + @tag_args async def get_state_ids_for_group( self, state_group: int, state_filter: Optional[StateFilter] = None ) -> StateMap[str]: @@ -130,6 +136,8 @@ class StateStorageController: return group_to_state[state_group] + @trace + @tag_args async def get_state_groups( self, room_id: str, event_ids: Collection[str] ) -> Dict[int, List[EventBase]]: @@ -165,6 +173,8 @@ class StateStorageController: for group, event_id_map in group_to_ids.items() } + @trace + @tag_args def _get_state_groups_from_groups( self, groups: List[int], state_filter: StateFilter ) -> Awaitable[Dict[int, StateMap[str]]]: @@ -183,6 +193,7 @@ class StateStorageController: return self.stores.state._get_state_groups_from_groups(groups, state_filter) @trace + @tag_args async def get_state_for_events( self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None ) -> Dict[str, StateMap[EventBase]]: @@ -280,6 +291,8 @@ class StateStorageController: return {event: event_to_state[event] for event in event_ids} + @trace + @tag_args async def get_state_for_event( self, event_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[EventBase]: @@ -303,6 +316,7 @@ class StateStorageController: return state_map[event_id] @trace + @tag_args async def get_state_ids_for_event( self, event_id: str, @@ -333,6 +347,8 @@ class StateStorageController: ) return state_map[event_id] + @trace + @tag_args def get_state_for_groups( self, groups: Iterable[int], state_filter: Optional[StateFilter] = None ) -> Awaitable[Dict[int, MutableStateMap[str]]]: @@ -402,6 +418,8 @@ class StateStorageController: event_id, room_id, prev_group, delta_ids, current_state_ids ) + @trace + @tag_args @cancellable async def get_current_state_ids( self, @@ -442,6 +460,8 @@ class StateStorageController: room_id, on_invalidate=on_invalidate ) + @trace + @tag_args async def get_canonical_alias_for_room(self, room_id: str) -> Optional[str]: """Get canonical alias for room, if any @@ -466,6 +486,8 @@ class StateStorageController: return event.content.get("canonical_alias") + @trace + @tag_args async def get_current_state_deltas( self, prev_stream_id: int, max_stream_id: int ) -> Tuple[int, List[Dict[str, Any]]]: @@ -500,6 +522,7 @@ class StateStorageController: ) @trace + @tag_args async def get_current_state( self, room_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[EventBase]: @@ -516,6 +539,8 @@ class StateStorageController: return state_map + @trace + @tag_args async def get_current_state_event( self, room_id: str, event_type: str, state_key: str ) -> Optional[EventBase]: @@ -527,6 +552,8 @@ class StateStorageController: ) return state_map.get(key) + @trace + @tag_args async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]: """Get current hosts in room based on current state. @@ -538,6 +565,8 @@ class StateStorageController: return await self.stores.main.get_current_hosts_in_room(room_id) + @trace + @tag_args async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: """Get current hosts in room based on current state. @@ -553,6 +582,8 @@ class StateStorageController: return await self.stores.main.get_current_hosts_in_room_ordered(room_id) + @trace + @tag_args async def get_current_hosts_in_room_or_partial_state_approximation( self, room_id: str ) -> Collection[str]: @@ -582,6 +613,8 @@ class StateStorageController: return hosts + @trace + @tag_args async def get_users_in_room_with_profiles( self, room_id: str ) -> Mapping[str, ProfileInfo]: diff --git a/synapse/storage/controllers/stats.py b/synapse/storage/controllers/stats.py index 988e44c6af..2a03528fee 100644 --- a/synapse/storage/controllers/stats.py +++ b/synapse/storage/controllers/stats.py @@ -13,8 +13,7 @@ # limitations under the License. import logging -from collections import Counter -from typing import TYPE_CHECKING, Collection, List, Tuple +from typing import TYPE_CHECKING, Collection, Counter, List, Tuple from synapse.api.errors import SynapseError from synapse.storage.database import LoggingTransaction diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 313cf1a8d0..bdaa508dbe 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -565,9 +565,8 @@ class DatabasePool: # A set of tables that are not safe to use native upserts in. self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys()) - # We add the user_directory_search table to the blacklist on SQLite - # because the existing search table does not have an index, making it - # unsafe to use native upserts. + # The user_directory_search table is unsafe to use native upserts + # on SQLite because the existing search table does not have an index. if isinstance(self.engine, Sqlite3Engine): self._unsafe_to_upsert_tables.add("user_directory_search") diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index a9843f6e17..8f7bdbc61a 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -85,13 +85,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) writers=hs.config.worker.writers.account_data, ) else: + # Multiple writers are not supported for SQLite. + # # We shouldn't be running in worker mode with SQLite, but its useful # to support it for unit tests. - # - # If this process is the writer than we need to use - # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets - # updated over replication. (Multiple writers are not supported for - # SQLite). self._account_data_id_gen = StreamIdGenerator( db_conn, hs.get_replication_notifier(), diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index bd07d20171..46fa0a73f9 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -274,11 +274,11 @@ class CacheInvalidationWorkerStore(SQLBaseStore): async def invalidate_cache_and_stream( self, cache_name: str, keys: Tuple[Any, ...] ) -> None: - """Invalidates the cache and adds it to the cache stream so slaves + """Invalidates the cache and adds it to the cache stream so other workers will know to invalidate their caches. - This should only be used to invalidate caches where slaves won't - otherwise know from other replication streams that the cache should + This should only be used to invalidate caches where other workers won't + otherwise have known from other replication streams that the cache should be invalidated. """ cache_func = getattr(self, cache_name, None) @@ -297,11 +297,11 @@ class CacheInvalidationWorkerStore(SQLBaseStore): cache_func: CachedFunction, keys: Tuple[Any, ...], ) -> None: - """Invalidates the cache and adds it to the cache stream so slaves + """Invalidates the cache and adds it to the cache stream so other workers will know to invalidate their caches. - This should only be used to invalidate caches where slaves won't - otherwise know from other replication streams that the cache should + This should only be used to invalidate caches where other workers won't + otherwise have known from other replication streams that the cache should be invalidated. """ txn.call_after(cache_func.invalidate, keys) @@ -310,7 +310,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore): def _invalidate_all_cache_and_stream( self, txn: LoggingTransaction, cache_func: CachedFunction ) -> None: - """Invalidates the entire cache and adds it to the cache stream so slaves + """Invalidates the entire cache and adds it to the cache stream so other workers will know to invalidate their caches. """ diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5503621ad6..a67fdb3c22 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -105,8 +105,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): is_writer=hs.config.worker.worker_app is None, ) - # Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a - # StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker). device_list_max = self._device_list_id_gen.get_current_token() device_list_prefill, min_device_list_id = self.db_pool.get_cache_dict( db_conn, diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 0ff3fc7369..a39bc90974 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -213,13 +213,10 @@ class EventsWorkerStore(SQLBaseStore): writers=hs.config.worker.writers.events, ) else: + # Multiple writers are not supported for SQLite. + # # We shouldn't be running in worker mode with SQLite, but its useful # to support it for unit tests. - # - # If this process is the writer than we need to use - # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets - # updated over replication. (Multiple writers are not supported for - # SQLite). self._stream_id_gen = StreamIdGenerator( db_conn, hs.get_replication_notifier(), @@ -1976,12 +1973,6 @@ class EventsWorkerStore(SQLBaseStore): return rows, to_token, True - async def is_event_after(self, event_id1: str, event_id2: str) -> bool: - """Returns True if event_id1 is after event_id2 in the stream""" - to_1, so_1 = await self.get_event_ordering(event_id1) - to_2, so_2 = await self.get_event_ordering(event_id2) - return (to_1, so_1) > (to_2, so_2) - @cached(max_entries=5000) async def get_event_ordering(self, event_id: str) -> Tuple[int, int]: res = await self.db_pool.simple_select_one( diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py index 50516402f9..da31eb44dc 100644 --- a/synapse/storage/databases/main/filtering.py +++ b/synapse/storage/databases/main/filtering.py @@ -25,6 +25,7 @@ from synapse.storage.database import ( LoggingDatabaseConnection, LoggingTransaction, ) +from synapse.storage.engines import PostgresEngine from synapse.types import JsonDict, UserID from synapse.util.caches.descriptors import cached @@ -40,6 +41,8 @@ class FilteringWorkerStore(SQLBaseStore): hs: "HomeServer", ): super().__init__(database, db_conn, hs) + self.server_name: str = hs.hostname + self.database_engine = database.engine self.db_pool.updates.register_background_index_update( "full_users_filters_unique_idx", index_name="full_users_unique_idx", @@ -48,6 +51,98 @@ class FilteringWorkerStore(SQLBaseStore): unique=True, ) + self.db_pool.updates.register_background_update_handler( + "populate_full_user_id_user_filters", + self.populate_full_user_id_user_filters, + ) + + async def populate_full_user_id_user_filters( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Background update to populate the column `full_user_id` of the table + user_filters from entries in the column `user_local_part` of the same table + """ + + lower_bound_id = progress.get("lower_bound_id", "") + + def _get_last_id(txn: LoggingTransaction) -> Optional[str]: + sql = """ + SELECT user_id FROM user_filters + WHERE user_id > ? + ORDER BY user_id + LIMIT 1 OFFSET 50 + """ + txn.execute(sql, (lower_bound_id,)) + res = txn.fetchone() + if res: + upper_bound_id = res[0] + return upper_bound_id + else: + return None + + def _process_batch( + txn: LoggingTransaction, lower_bound_id: str, upper_bound_id: str + ) -> None: + sql = """ + UPDATE user_filters + SET full_user_id = '@' || user_id || ? + WHERE ? < user_id AND user_id <= ? AND full_user_id IS NULL + """ + txn.execute(sql, (f":{self.server_name}", lower_bound_id, upper_bound_id)) + + def _final_batch(txn: LoggingTransaction, lower_bound_id: str) -> None: + sql = """ + UPDATE user_filters + SET full_user_id = '@' || user_id || ? + WHERE ? < user_id AND full_user_id IS NULL + """ + txn.execute( + sql, + ( + f":{self.server_name}", + lower_bound_id, + ), + ) + + if isinstance(self.database_engine, PostgresEngine): + sql = """ + ALTER TABLE user_filters VALIDATE CONSTRAINT full_user_id_not_null + """ + txn.execute(sql) + + upper_bound_id = await self.db_pool.runInteraction( + "populate_full_user_id_user_filters", _get_last_id + ) + + if upper_bound_id is None: + await self.db_pool.runInteraction( + "populate_full_user_id_user_filters", _final_batch, lower_bound_id + ) + + await self.db_pool.updates._end_background_update( + "populate_full_user_id_user_filters" + ) + return 1 + + await self.db_pool.runInteraction( + "populate_full_user_id_user_filters", + _process_batch, + lower_bound_id, + upper_bound_id, + ) + + progress["lower_bound_id"] = upper_bound_id + + await self.db_pool.runInteraction( + "populate_full_user_id_user_filters", + self.db_pool.updates._background_update_progress_txn, + "populate_full_user_id_user_filters", + progress, + ) + + return 50 + @cached(num_args=2) async def get_user_filter( self, user_localpart: str, filter_id: Union[int, str] diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py index 14294a0bb8..595e22982e 100644 --- a/synapse/storage/databases/main/metrics.py +++ b/synapse/storage/databases/main/metrics.py @@ -248,89 +248,6 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): (count,) = cast(Tuple[int], txn.fetchone()) return count - async def count_r30_users(self) -> Dict[str, int]: - """ - Counts the number of 30 day retained users, defined as:- - * Users who have created their accounts more than 30 days ago - * Where last seen at most 30 days ago - * Where account creation and last_seen are > 30 days apart - - Returns: - A mapping of counts globally as well as broken out by platform. - """ - - def _count_r30_users(txn: LoggingTransaction) -> Dict[str, int]: - thirty_days_in_secs = 86400 * 30 - now = int(self._clock.time()) - thirty_days_ago_in_secs = now - thirty_days_in_secs - - sql = """ - SELECT platform, COUNT(*) FROM ( - SELECT - users.name, platform, users.creation_ts * 1000, - MAX(uip.last_seen) - FROM users - INNER JOIN ( - SELECT - user_id, - last_seen, - CASE - WHEN user_agent LIKE '%%Android%%' THEN 'android' - WHEN user_agent LIKE '%%iOS%%' THEN 'ios' - WHEN user_agent LIKE '%%Electron%%' THEN 'electron' - WHEN user_agent LIKE '%%Mozilla%%' THEN 'web' - WHEN user_agent LIKE '%%Gecko%%' THEN 'web' - ELSE 'unknown' - END - AS platform - FROM user_ips - ) uip - ON users.name = uip.user_id - AND users.appservice_id is NULL - AND users.creation_ts < ? - AND uip.last_seen/1000 > ? - AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 - GROUP BY users.name, platform, users.creation_ts - ) u GROUP BY platform - """ - - results = {} - txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - - for row in txn: - if row[0] == "unknown": - pass - results[row[0]] = row[1] - - sql = """ - SELECT COUNT(*) FROM ( - SELECT users.name, users.creation_ts * 1000, - MAX(uip.last_seen) - FROM users - INNER JOIN ( - SELECT - user_id, - last_seen - FROM user_ips - ) uip - ON users.name = uip.user_id - AND appservice_id is NULL - AND users.creation_ts < ? - AND uip.last_seen/1000 > ? - AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 - GROUP BY users.name, users.creation_ts - ) u - """ - - txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - - (count,) = cast(Tuple[int], txn.fetchone()) - results["all"] = count - - return results - - return await self.db_pool.runInteraction("count_r30_users", _count_r30_users) - async def count_r30v2_users(self) -> Dict[str, int]: """ Counts the number of 30 day retained users, defined as users that: diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py index c4022d2427..65c92bef51 100644 --- a/synapse/storage/databases/main/profile.py +++ b/synapse/storage/databases/main/profile.py @@ -15,9 +15,14 @@ from typing import TYPE_CHECKING, Optional from synapse.api.errors import StoreError from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.databases.main.roommember import ProfileInfo -from synapse.types import UserID +from synapse.storage.engines import PostgresEngine +from synapse.types import JsonDict, UserID if TYPE_CHECKING: from synapse.server import HomeServer @@ -31,6 +36,8 @@ class ProfileWorkerStore(SQLBaseStore): hs: "HomeServer", ): super().__init__(database, db_conn, hs) + self.server_name: str = hs.hostname + self.database_engine = database.engine self.db_pool.updates.register_background_index_update( "profiles_full_user_id_key_idx", index_name="profiles_full_user_id_key", @@ -39,6 +46,97 @@ class ProfileWorkerStore(SQLBaseStore): unique=True, ) + self.db_pool.updates.register_background_update_handler( + "populate_full_user_id_profiles", self.populate_full_user_id_profiles + ) + + async def populate_full_user_id_profiles( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Background update to populate the column `full_user_id` of the table + profiles from entries in the column `user_local_part` of the same table + """ + + lower_bound_id = progress.get("lower_bound_id", "") + + def _get_last_id(txn: LoggingTransaction) -> Optional[str]: + sql = """ + SELECT user_id FROM profiles + WHERE user_id > ? + ORDER BY user_id + LIMIT 1 OFFSET 50 + """ + txn.execute(sql, (lower_bound_id,)) + res = txn.fetchone() + if res: + upper_bound_id = res[0] + return upper_bound_id + else: + return None + + def _process_batch( + txn: LoggingTransaction, lower_bound_id: str, upper_bound_id: str + ) -> None: + sql = """ + UPDATE profiles + SET full_user_id = '@' || user_id || ? + WHERE ? < user_id AND user_id <= ? AND full_user_id IS NULL + """ + txn.execute(sql, (f":{self.server_name}", lower_bound_id, upper_bound_id)) + + def _final_batch(txn: LoggingTransaction, lower_bound_id: str) -> None: + sql = """ + UPDATE profiles + SET full_user_id = '@' || user_id || ? + WHERE ? < user_id AND full_user_id IS NULL + """ + txn.execute( + sql, + ( + f":{self.server_name}", + lower_bound_id, + ), + ) + + if isinstance(self.database_engine, PostgresEngine): + sql = """ + ALTER TABLE profiles VALIDATE CONSTRAINT full_user_id_not_null + """ + txn.execute(sql) + + upper_bound_id = await self.db_pool.runInteraction( + "populate_full_user_id_profiles", _get_last_id + ) + + if upper_bound_id is None: + await self.db_pool.runInteraction( + "populate_full_user_id_profiles", _final_batch, lower_bound_id + ) + + await self.db_pool.updates._end_background_update( + "populate_full_user_id_profiles" + ) + return 1 + + await self.db_pool.runInteraction( + "populate_full_user_id_profiles", + _process_batch, + lower_bound_id, + upper_bound_id, + ) + + progress["lower_bound_id"] = upper_bound_id + + await self.db_pool.runInteraction( + "populate_full_user_id_profiles", + self.db_pool.updates._background_update_progress_txn, + "populate_full_user_id_profiles", + progress, + ) + + return 50 + async def get_profileinfo(self, user_localpart: str) -> ProfileInfo: try: profile = await self.db_pool.simple_select_one( diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 074942b167..5ee5c7ad9f 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -85,13 +85,10 @@ class ReceiptsWorkerStore(SQLBaseStore): else: self._can_write_to_receipts = True + # Multiple writers are not supported for SQLite. + # # We shouldn't be running in worker mode with SQLite, but its useful # to support it for unit tests. - # - # If this process is the writer than we need to use - # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets - # updated over replication. (Multiple writers are not supported for - # SQLite). self._receipts_id_gen = StreamIdGenerator( db_conn, hs.get_replication_notifier(), diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 097dea5182..86eb1a8a08 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union +from synapse.logging.opentracing import tag_args, trace from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -40,6 +41,8 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): updates. """ + @trace + @tag_args def _count_state_group_hops_txn( self, txn: LoggingTransaction, state_group: int ) -> int: @@ -83,6 +86,8 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): return count + @trace + @tag_args def _get_state_groups_from_groups_txn( self, txn: LoggingTransaction, diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 29ff64e876..6984d11352 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -20,6 +20,7 @@ import attr from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.events.snapshot import UnpersistedEventContext, UnpersistedEventContextBase +from synapse.logging.opentracing import tag_args, trace from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -159,6 +160,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): "get_state_group_delta", _get_state_group_delta_txn ) + @trace + @tag_args @cancellable async def _get_state_groups_from_groups( self, groups: List[int], state_filter: StateFilter @@ -187,6 +190,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): return results + @trace + @tag_args def _get_state_for_group_using_cache( self, cache: DictionaryCache[int, StateKey, str], @@ -239,6 +244,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): return state_filter.filter_state(state_dict_ids), not missing_types + @trace + @tag_args @cancellable async def _get_state_for_groups( self, groups: Iterable[int], state_filter: Optional[StateFilter] = None @@ -305,6 +312,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): return state + @trace + @tag_args def _get_state_for_groups_using_cache( self, groups: Iterable[int], @@ -403,6 +412,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): fetched_keys=non_member_types, ) + @trace + @tag_args async def store_state_deltas_for_batched( self, events_and_context: List[Tuple[EventBase, UnpersistedEventContextBase]], @@ -520,6 +531,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): prev_group, ) + @trace + @tag_args async def store_state_group( self, event_id: str, @@ -772,6 +785,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): ((sg,) for sg in state_groups_to_delete), ) + @trace + @tag_args async def get_previous_state_groups( self, state_groups: Iterable[int] ) -> Dict[int, int]: diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 1672976209..df2cc31ca6 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 76 # remember to update the list below when updating +SCHEMA_VERSION = 77 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -100,13 +100,19 @@ Changes in SCHEMA_VERSION = 75: Changes in SCHEMA_VERSION = 76: - Adds a full_user_id column to tables profiles and user_filters. + +Changes in SCHEMA_VERSION = 77 + - (Postgres) Add NOT VALID CHECK (full_user_id IS NOT NULL) to tables profiles and user_filters """ SCHEMA_COMPAT_VERSION = ( # Queries against `event_stream_ordering` columns in membership tables must # be disambiguated. - 74 + # + # insertions to the column `full_user_id` of tables profiles and user_filters can no + # longer be null + 76 ) """Limit on how far the synapse codebase can be rolled back without breaking db compat diff --git a/synapse/storage/schema/main/delta/34/cache_stream.py b/synapse/storage/schema/main/delta/34/cache_stream.py index 682c86da1a..882f9b893b 100644 --- a/synapse/storage/schema/main/delta/34/cache_stream.py +++ b/synapse/storage/schema/main/delta/34/cache_stream.py @@ -21,7 +21,7 @@ from synapse.storage.prepare_database import get_statements logger = logging.getLogger(__name__) -# This stream is used to notify replication slaves that some caches have +# This stream is used to notify workers over replication that some caches have # been invalidated that they cannot infer from the other streams. CREATE_TABLE = """ CREATE TABLE cache_invalidation_stream ( diff --git a/synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres b/synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres new file mode 100644 index 0000000000..3eb226c648 --- /dev/null +++ b/synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres @@ -0,0 +1,16 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID; \ No newline at end of file diff --git a/synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres b/synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres new file mode 100644 index 0000000000..ba037daf47 --- /dev/null +++ b/synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres @@ -0,0 +1,16 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID; \ No newline at end of file diff --git a/synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql b/synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql new file mode 100644 index 0000000000..12101ab914 --- /dev/null +++ b/synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql @@ -0,0 +1,16 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7703, 'populate_full_user_id_profiles', '{}'); \ No newline at end of file diff --git a/synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql b/synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql new file mode 100644 index 0000000000..1f4d683cac --- /dev/null +++ b/synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql @@ -0,0 +1,16 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7704, 'populate_full_user_id_user_filters', '{}'); \ No newline at end of file diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 325219656a..42baf8ac6b 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -84,7 +84,15 @@ JsonSerializable = object # Collection[str] that does not include str itself; str being a Sequence[str] # is very misleading and results in bugs. +# +# StrCollection is an unordered collection of strings. If ordering is important, +# StrSequence can be used instead. StrCollection = Union[Tuple[str, ...], List[str], AbstractSet[str]] +# Sequence[str] that does not include str itself; str being a Sequence[str] +# is very misleading and results in bugs. +# +# Unlike StrCollection, StrSequence is an ordered collection of strings. +StrSequence = Union[Tuple[str, ...], List[str]] # Note that this seems to require inheriting *directly* from Interface in order diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py index 5a638c6e9a..e3a54df48b 100644 --- a/synapse/util/module_loader.py +++ b/synapse/util/module_loader.py @@ -14,17 +14,17 @@ import importlib import importlib.util -import itertools from types import ModuleType -from typing import Any, Iterable, Tuple, Type +from typing import Any, Tuple, Type import jsonschema from synapse.config._base import ConfigError from synapse.config._util import json_error_to_config_error +from synapse.types import StrSequence -def load_module(provider: dict, config_path: Iterable[str]) -> Tuple[Type, Any]: +def load_module(provider: dict, config_path: StrSequence) -> Tuple[Type, Any]: """Loads a synapse module with its config Args: @@ -39,9 +39,7 @@ def load_module(provider: dict, config_path: Iterable[str]) -> Tuple[Type, Any]: modulename = provider.get("module") if not isinstance(modulename, str): - raise ConfigError( - "expected a string", path=itertools.chain(config_path, ("module",)) - ) + raise ConfigError("expected a string", path=tuple(config_path) + ("module",)) # We need to import the module, and then pick the class out of # that, so we split based on the last dot. @@ -55,19 +53,17 @@ def load_module(provider: dict, config_path: Iterable[str]) -> Tuple[Type, Any]: try: provider_config = provider_class.parse_config(module_config) except jsonschema.ValidationError as e: - raise json_error_to_config_error( - e, itertools.chain(config_path, ("config",)) - ) + raise json_error_to_config_error(e, tuple(config_path) + ("config",)) except ConfigError as e: raise _wrap_config_error( "Failed to parse config for module %r" % (modulename,), - prefix=itertools.chain(config_path, ("config",)), + prefix=tuple(config_path) + ("config",), e=e, ) except Exception as e: raise ConfigError( "Failed to parse config for module %r" % (modulename,), - path=itertools.chain(config_path, ("config",)), + path=tuple(config_path) + ("config",), ) from e else: provider_config = module_config @@ -92,9 +88,7 @@ def load_python_module(location: str) -> ModuleType: return mod -def _wrap_config_error( - msg: str, prefix: Iterable[str], e: ConfigError -) -> "ConfigError": +def _wrap_config_error(msg: str, prefix: StrSequence, e: ConfigError) -> "ConfigError": """Wrap a relative ConfigError with a new path This is useful when we have a ConfigError with a relative path due to a problem @@ -102,7 +96,7 @@ def _wrap_config_error( """ path = prefix if e.path: - path = itertools.chain(prefix, e.path) + path = tuple(prefix) + tuple(e.path) e1 = ConfigError(msg, path) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index f262bf95a0..2ad55ac13e 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -25,10 +25,12 @@ from typing import ( Iterator, List, Mapping, + MutableSet, Optional, Set, Tuple, ) +from weakref import WeakSet from prometheus_client.core import Counter from typing_extensions import ContextManager @@ -86,7 +88,9 @@ queue_wait_timer = Histogram( ) -_rate_limiter_instances: Set["FederationRateLimiter"] = set() +# This must be a `WeakSet`, otherwise we indirectly hold on to entire `HomeServer`s +# during trial test runs and leak a lot of memory. +_rate_limiter_instances: MutableSet["FederationRateLimiter"] = WeakSet() # Protects the _rate_limiter_instances set from concurrent access _rate_limiter_instances_lock = threading.Lock() diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py index 222449baac..aa6af5ad7b 100644 --- a/tests/api/test_filtering.py +++ b/tests/api/test_filtering.py @@ -48,8 +48,6 @@ class FilteringTestCase(unittest.HomeserverTestCase): invalid_filters: List[JsonDict] = [ # `account_data` must be a dictionary {"account_data": "Hello World"}, - # `event_fields` entries must not contain backslashes - {"event_fields": [r"\\foo"]}, # `event_format` must be "client" or "federation" {"event_format": "other"}, # `not_rooms` must contain valid room IDs @@ -114,10 +112,6 @@ class FilteringTestCase(unittest.HomeserverTestCase): "event_format": "client", "event_fields": ["type", "content", "sender"], }, - # a single backslash should be permitted (though it is debatable whether - # it should be permitted before anything other than `.`, and what that - # actually means) - # # (note that event_fields is implemented in # synapse.events.utils.serialize_event, and so whether this actually works # is tested elsewhere. We just want to check that it is allowed through the diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py index 2ee343d8a4..6e0413400e 100644 --- a/tests/app/test_openid_listener.py +++ b/tests/app/test_openid_listener.py @@ -38,7 +38,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase): def default_config(self) -> JsonDict: conf = super().default_config() - # we're using FederationReaderServer, which uses a SlavedStore, so we + # we're using GenericWorkerServer, which uses a GenericWorkerStore, so we # have to tell the FederationHandler not to try to access stuff that is only # in the primary store. conf["worker_app"] = "yes" diff --git a/tests/app/test_phone_stats_home.py b/tests/app/test_phone_stats_home.py index a860eedbcf..9305b758d7 100644 --- a/tests/app/test_phone_stats_home.py +++ b/tests/app/test_phone_stats_home.py @@ -4,7 +4,6 @@ from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.util import Clock -from tests import unittest from tests.server import ThreadedMemoryReactorClock from tests.unittest import HomeserverTestCase @@ -12,154 +11,6 @@ FIVE_MINUTES_IN_SECONDS = 300 ONE_DAY_IN_SECONDS = 86400 -class PhoneHomeTestCase(HomeserverTestCase): - servlets = [ - synapse.rest.admin.register_servlets_for_client_rest_resource, - room.register_servlets, - login.register_servlets, - ] - - # Override the retention time for the user_ips table because otherwise it - # gets pruned too aggressively for our R30 test. - @unittest.override_config({"user_ips_max_age": "365d"}) - def test_r30_minimum_usage(self) -> None: - """ - Tests the minimum amount of interaction necessary for the R30 metric - to consider a user 'retained'. - """ - - # Register a user, log it in, create a room and send a message - user_id = self.register_user("u1", "secret!") - access_token = self.login("u1", "secret!") - room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token) - self.helper.send(room_id, "message", tok=access_token) - - # Check the R30 results do not count that user. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Advance 30 days (+ 1 second, because strict inequality causes issues if we are - # bang on 30 days later). - self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1) - - # (Make sure the user isn't somehow counted by this point.) - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Send a message (this counts as activity) - self.helper.send(room_id, "message2", tok=access_token) - - # We have to wait some time for _update_client_ips_batch to get - # called and update the user_ips table. - self.reactor.advance(2 * 60 * 60) - - # *Now* the user is counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance 29 days. The user has now not posted for 29 days. - self.reactor.advance(29 * ONE_DAY_IN_SECONDS) - - # The user is still counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance another day. The user has now not posted for 30 days. - self.reactor.advance(ONE_DAY_IN_SECONDS) - - # The user is now no longer counted in R30. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - def test_r30_minimum_usage_using_default_config(self) -> None: - """ - Tests the minimum amount of interaction necessary for the R30 metric - to consider a user 'retained'. - - N.B. This test does not override the `user_ips_max_age` config setting, - which defaults to 28 days. - """ - - # Register a user, log it in, create a room and send a message - user_id = self.register_user("u1", "secret!") - access_token = self.login("u1", "secret!") - room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token) - self.helper.send(room_id, "message", tok=access_token) - - # Check the R30 results do not count that user. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Advance 30 days (+ 1 second, because strict inequality causes issues if we are - # bang on 30 days later). - self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1) - - # (Make sure the user isn't somehow counted by this point.) - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - # Send a message (this counts as activity) - self.helper.send(room_id, "message2", tok=access_token) - - # We have to wait some time for _update_client_ips_batch to get - # called and update the user_ips table. - self.reactor.advance(2 * 60 * 60) - - # *Now* the user is counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance 27 days. The user has now not posted for 27 days. - self.reactor.advance(27 * ONE_DAY_IN_SECONDS) - - # The user is still counted. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - # Advance another day. The user has now not posted for 28 days. - self.reactor.advance(ONE_DAY_IN_SECONDS) - - # The user is now no longer counted in R30. - # (This is because the user_ips table has been pruned, which by default - # only preserves the last 28 days of entries.) - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - def test_r30_user_must_be_retained_for_at_least_a_month(self) -> None: - """ - Tests that a newly-registered user must be retained for a whole month - before appearing in the R30 statistic, even if they post every day - during that time! - """ - # Register a user and send a message - user_id = self.register_user("u1", "secret!") - access_token = self.login("u1", "secret!") - room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token) - self.helper.send(room_id, "message", tok=access_token) - - # Check the user does not contribute to R30 yet. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 0}) - - for _ in range(30): - # This loop posts a message every day for 30 days - self.reactor.advance(ONE_DAY_IN_SECONDS) - self.helper.send(room_id, "I'm still here", tok=access_token) - - # Notice that the user *still* does not contribute to R30! - r30_results = self.get_success( - self.hs.get_datastores().main.count_r30_users() - ) - self.assertEqual(r30_results, {"all": 0}) - - self.reactor.advance(ONE_DAY_IN_SECONDS) - self.helper.send(room_id, "Still here!", tok=access_token) - - # *Now* the user appears in R30. - r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "unknown": 1}) - - class PhoneHomeR30V2TestCase(HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -363,11 +214,6 @@ class PhoneHomeR30V2TestCase(HomeserverTestCase): r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0} ) - # Check that this is a situation where old R30 differs: - # old R30 DOES count this as 'retained'. - r30_results = self.get_success(store.count_r30_users()) - self.assertEqual(r30_results, {"all": 1, "ios": 1}) - # Now we want to check that the user will still be able to appear in # R30v2 as long as the user performs some other activity between # 30 and 60 days later. diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index e40eac2eb0..c9a610db9a 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -16,6 +16,7 @@ import unittest as stdlib_unittest from typing import Any, List, Mapping, Optional import attr +from parameterized import parameterized from synapse.api.constants import EventContentFields from synapse.api.room_versions import RoomVersions @@ -23,6 +24,7 @@ from synapse.events import EventBase, make_event_from_dict from synapse.events.utils import ( PowerLevelsContent, SerializeEventConfig, + _split_field, copy_and_fixup_power_levels_contents, maybe_upsert_event_field, prune_event, @@ -794,3 +796,40 @@ class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase): def test_invalid_nesting_raises_type_error(self) -> None: with self.assertRaises(TypeError): copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}}) # type: ignore[dict-item] + + +class SplitFieldTestCase(stdlib_unittest.TestCase): + @parameterized.expand( + [ + # A field with no dots. + ["m", ["m"]], + # Simple dotted fields. + ["m.foo", ["m", "foo"]], + ["m.foo.bar", ["m", "foo", "bar"]], + # Backslash is used as an escape character. + [r"m\.foo", ["m.foo"]], + [r"m\\.foo", ["m\\", "foo"]], + [r"m\\\.foo", [r"m\.foo"]], + [r"m\\\\.foo", ["m\\\\", "foo"]], + [r"m\foo", [r"m\foo"]], + [r"m\\foo", [r"m\foo"]], + [r"m\\\foo", [r"m\\foo"]], + [r"m\\\\foo", [r"m\\foo"]], + # Ensure that escapes at the end don't cause issues. + ["m.foo\\", ["m", "foo\\"]], + ["m.foo\\", ["m", "foo\\"]], + [r"m.foo\.", ["m", "foo."]], + [r"m.foo\\.", ["m", "foo\\", ""]], + [r"m.foo\\\.", ["m", r"foo\."]], + # Empty parts (corresponding to properties which are an empty string) are allowed. + [".m", ["", "m"]], + ["..m", ["", "", "m"]], + ["m.", ["m", ""]], + ["m..", ["m", "", ""]], + ["m..foo", ["m", "", "foo"]], + # Invalid escape sequences. + [r"\m", [r"\m"]], + ] + ) + def test_split_field(self, input: str, expected: str) -> None: + self.assertEqual(_split_field(input), expected) diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py index 6c7738d810..5c850d1843 100644 --- a/tests/federation/test_federation_server.py +++ b/tests/federation/test_federation_server.py @@ -63,7 +63,7 @@ class FederationServerTests(unittest.FederatingHomeserverTestCase): class ServerACLsTestCase(unittest.TestCase): - def test_blacklisted_server(self) -> None: + def test_blocked_server(self) -> None: e = _create_acl_event({"allow": ["*"], "deny": ["evil.com"]}) logging.info("ACL event: %s", e.content) diff --git a/tests/federation/transport/test_client.py b/tests/federation/transport/test_client.py index 3d61b1e8a9..93e5c85a27 100644 --- a/tests/federation/transport/test_client.py +++ b/tests/federation/transport/test_client.py @@ -86,18 +86,7 @@ class SendJoinParserTestCase(TestCase): return parsed_response.members_omitted self.assertTrue(parse({"members_omitted": True})) - self.assertTrue(parse({"org.matrix.msc3706.partial_state": True})) - self.assertFalse(parse({"members_omitted": False})) - self.assertFalse(parse({"org.matrix.msc3706.partial_state": False})) - - # If there's a conflict, the stable field wins. - self.assertTrue( - parse({"members_omitted": True, "org.matrix.msc3706.partial_state": False}) - ) - self.assertFalse( - parse({"members_omitted": False, "org.matrix.msc3706.partial_state": True}) - ) def test_servers_in_room(self) -> None: """Check that the servers_in_room field is correctly parsed""" @@ -113,28 +102,10 @@ class SendJoinParserTestCase(TestCase): parsed_response = parser.finish() return parsed_response.servers_in_room - self.assertEqual( - parse({"org.matrix.msc3706.servers_in_room": ["hs1", "hs2"]}), - ["hs1", "hs2"], - ) self.assertEqual(parse({"servers_in_room": ["example.com"]}), ["example.com"]) - # If both are provided, the stable identifier should win - self.assertEqual( - parse( - { - "org.matrix.msc3706.servers_in_room": ["old"], - "servers_in_room": ["new"], - } - ), - ["new"], - ) - - # And lastly, we should be able to tell if neither field was present. - self.assertEqual( - parse({}), - None, - ) + # We should be able to tell the field is not present. + self.assertEqual(parse({}), None) def test_errors_closing_coroutines(self) -> None: """Check we close all coroutines, even if closing the first raises an Exception. @@ -143,7 +114,7 @@ class SendJoinParserTestCase(TestCase): assertions about its attributes or type. """ parser = SendJoinParser(RoomVersions.V1, False) - response = {"org.matrix.msc3706.servers_in_room": ["hs1", "hs2"]} + response = {"servers_in_room": ["hs1", "hs2"]} serialisation = json.dumps(response).encode() # Mock the coroutines managed by this parser. diff --git a/tests/handlers/test_sso.py b/tests/handlers/test_sso.py index 620ae3a4ba..b9ffdb4ced 100644 --- a/tests/handlers/test_sso.py +++ b/tests/handlers/test_sso.py @@ -31,7 +31,7 @@ class TestSSOHandler(unittest.HomeserverTestCase): self.http_client.get_file.side_effect = mock_get_file self.http_client.user_agent = b"Synapse Test" hs = self.setup_test_homeserver( - proxied_blacklisted_http_client=self.http_client + proxied_blocklisted_http_client=self.http_client ) return hs diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index eb7f53fee5..105b4caefa 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -269,8 +269,8 @@ class MatrixFederationAgentTests(unittest.TestCase): reactor=cast(ISynapseReactor, self.reactor), tls_client_options_factory=self.tls_factory, user_agent=b"test-agent", # Note that this is unused since _well_known_resolver is provided. - ip_whitelist=IPSet(), - ip_blacklist=IPSet(), + ip_allowlist=IPSet(), + ip_blocklist=IPSet(), _srv_resolver=self.mock_resolver, _well_known_resolver=self.well_known_resolver, ) @@ -997,8 +997,8 @@ class MatrixFederationAgentTests(unittest.TestCase): reactor=self.reactor, tls_client_options_factory=tls_factory, user_agent=b"test-agent", # This is unused since _well_known_resolver is passed below. - ip_whitelist=IPSet(), - ip_blacklist=IPSet(), + ip_allowlist=IPSet(), + ip_blocklist=IPSet(), _srv_resolver=self.mock_resolver, _well_known_resolver=WellKnownResolver( cast(ISynapseReactor, self.reactor), diff --git a/tests/http/test_client.py b/tests/http/test_client.py index 57b6a84e23..a05b9f17a6 100644 --- a/tests/http/test_client.py +++ b/tests/http/test_client.py @@ -27,8 +27,8 @@ from twisted.web.iweb import UNKNOWN_LENGTH from synapse.api.errors import SynapseError from synapse.http.client import ( - BlacklistingAgentWrapper, - BlacklistingReactorWrapper, + BlocklistingAgentWrapper, + BlocklistingReactorWrapper, BodyExceededMaxSize, _DiscardBodyWithMaxSizeProtocol, read_body_with_max_size, @@ -140,7 +140,7 @@ class ReadBodyWithMaxSizeTests(TestCase): self.assertEqual(result.getvalue(), b"") -class BlacklistingAgentTest(TestCase): +class BlocklistingAgentTest(TestCase): def setUp(self) -> None: self.reactor, self.clock = get_clock() @@ -157,16 +157,16 @@ class BlacklistingAgentTest(TestCase): self.reactor.lookups[domain.decode()] = ip.decode() self.reactor.lookups[ip.decode()] = ip.decode() - self.ip_whitelist = IPSet([self.allowed_ip.decode()]) - self.ip_blacklist = IPSet(["5.0.0.0/8"]) + self.ip_allowlist = IPSet([self.allowed_ip.decode()]) + self.ip_blocklist = IPSet(["5.0.0.0/8"]) def test_reactor(self) -> None: - """Apply the blacklisting reactor and ensure it properly blocks connections to particular domains and IPs.""" + """Apply the blocklisting reactor and ensure it properly blocks connections to particular domains and IPs.""" agent = Agent( - BlacklistingReactorWrapper( + BlocklistingReactorWrapper( self.reactor, - ip_whitelist=self.ip_whitelist, - ip_blacklist=self.ip_blacklist, + ip_allowlist=self.ip_allowlist, + ip_blocklist=self.ip_blocklist, ), ) @@ -207,11 +207,11 @@ class BlacklistingAgentTest(TestCase): self.assertEqual(response.code, 200) def test_agent(self) -> None: - """Apply the blacklisting agent and ensure it properly blocks connections to particular IPs.""" - agent = BlacklistingAgentWrapper( + """Apply the blocklisting agent and ensure it properly blocks connections to particular IPs.""" + agent = BlocklistingAgentWrapper( Agent(self.reactor), - ip_blacklist=self.ip_blacklist, - ip_whitelist=self.ip_whitelist, + ip_blocklist=self.ip_blocklist, + ip_allowlist=self.ip_allowlist, ) # The unsafe IPs should be rejected. diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py index d89a91c59d..0dfc03ce50 100644 --- a/tests/http/test_matrixfederationclient.py +++ b/tests/http/test_matrixfederationclient.py @@ -231,11 +231,11 @@ class FederationClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestSendFailed) self.assertIsInstance(f.value.inner_exception, ResponseNeverReceived) - def test_client_ip_range_blacklist(self) -> None: - """Ensure that Synapse does not try to connect to blacklisted IPs""" + def test_client_ip_range_blocklist(self) -> None: + """Ensure that Synapse does not try to connect to blocked IPs""" - # Set up the ip_range blacklist - self.hs.config.server.federation_ip_range_blacklist = IPSet( + # Set up the ip_range blocklist + self.hs.config.server.federation_ip_range_blocklist = IPSet( ["127.0.0.0/8", "fe80::/64"] ) self.reactor.lookups["internal"] = "127.0.0.1" @@ -243,7 +243,7 @@ class FederationClientTests(HomeserverTestCase): self.reactor.lookups["fine"] = "10.20.30.40" cl = MatrixFederationHttpClient(self.hs, None) - # Try making a GET request to a blacklisted IPv4 address + # Try making a GET request to a blocked IPv4 address # ------------------------------------------------------ # Make the request d = defer.ensureDeferred(cl.get_json("internal:8008", "foo/bar", timeout=10000)) @@ -261,7 +261,7 @@ class FederationClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestSendFailed) self.assertIsInstance(f.value.inner_exception, DNSLookupError) - # Try making a POST request to a blacklisted IPv6 address + # Try making a POST request to a blocked IPv6 address # ------------------------------------------------------- # Make the request d = defer.ensureDeferred( @@ -278,11 +278,11 @@ class FederationClientTests(HomeserverTestCase): clients = self.reactor.tcpClients self.assertEqual(len(clients), 0) - # Check that it was due to a blacklisted DNS lookup + # Check that it was due to a blocked DNS lookup f = self.failureResultOf(d, RequestSendFailed) self.assertIsInstance(f.value.inner_exception, DNSLookupError) - # Try making a GET request to a non-blacklisted IPv4 address + # Try making a GET request to an allowed IPv4 address # ---------------------------------------------------------- # Make the request d = defer.ensureDeferred(cl.post_json("fine:8008", "foo/bar", timeout=10000)) diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index cc175052ac..e0ae5a88ff 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -32,7 +32,7 @@ from twisted.internet.protocol import Factory, Protocol from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol from twisted.web.http import HTTPChannel -from synapse.http.client import BlacklistingReactorWrapper +from synapse.http.client import BlocklistingReactorWrapper from synapse.http.connectproxyclient import ProxyCredentials from synapse.http.proxyagent import ProxyAgent, parse_proxy @@ -684,11 +684,11 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(body, b"result") @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"}) - def test_http_request_via_proxy_with_blacklist(self) -> None: - # The blacklist includes the configured proxy IP. + def test_http_request_via_proxy_with_blocklist(self) -> None: + # The blocklist includes the configured proxy IP. agent = ProxyAgent( - BlacklistingReactorWrapper( - self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"]) + BlocklistingReactorWrapper( + self.reactor, ip_allowlist=None, ip_blocklist=IPSet(["1.0.0.0/8"]) ), self.reactor, use_proxy=True, @@ -730,11 +730,11 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(body, b"result") @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"}) - def test_https_request_via_uppercase_proxy_with_blacklist(self) -> None: - # The blacklist includes the configured proxy IP. + def test_https_request_via_uppercase_proxy_with_blocklist(self) -> None: + # The blocklist includes the configured proxy IP. agent = ProxyAgent( - BlacklistingReactorWrapper( - self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"]) + BlocklistingReactorWrapper( + self.reactor, ip_allowlist=None, ip_blocklist=IPSet(["1.0.0.0/8"]) ), self.reactor, contextFactory=get_test_https_policy(), diff --git a/tests/http/test_simple_client.py b/tests/http/test_simple_client.py index 010601da4b..be731645bf 100644 --- a/tests/http/test_simple_client.py +++ b/tests/http/test_simple_client.py @@ -123,17 +123,17 @@ class SimpleHttpClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestTimedOutError) - def test_client_ip_range_blacklist(self) -> None: - """Ensure that Synapse does not try to connect to blacklisted IPs""" + def test_client_ip_range_blocklist(self) -> None: + """Ensure that Synapse does not try to connect to blocked IPs""" - # Add some DNS entries we'll blacklist + # Add some DNS entries we'll block self.reactor.lookups["internal"] = "127.0.0.1" self.reactor.lookups["internalv6"] = "fe80:0:0:0:0:8a2e:370:7337" - ip_blacklist = IPSet(["127.0.0.0/8", "fe80::/64"]) + ip_blocklist = IPSet(["127.0.0.0/8", "fe80::/64"]) - cl = SimpleHttpClient(self.hs, ip_blacklist=ip_blacklist) + cl = SimpleHttpClient(self.hs, ip_blocklist=ip_blocklist) - # Try making a GET request to a blacklisted IPv4 address + # Try making a GET request to a blocked IPv4 address # ------------------------------------------------------ # Make the request d = defer.ensureDeferred(cl.get_json("http://internal:8008/foo/bar")) @@ -145,7 +145,7 @@ class SimpleHttpClientTests(HomeserverTestCase): self.failureResultOf(d, DNSLookupError) - # Try making a POST request to a blacklisted IPv6 address + # Try making a POST request to a blocked IPv6 address # ------------------------------------------------------- # Make the request d = defer.ensureDeferred( @@ -159,10 +159,10 @@ class SimpleHttpClientTests(HomeserverTestCase): clients = self.reactor.tcpClients self.assertEqual(len(clients), 0) - # Check that it was due to a blacklisted DNS lookup + # Check that it was due to a blocked DNS lookup self.failureResultOf(d, DNSLookupError) - # Try making a GET request to a non-blacklisted IPv4 address + # Try making a GET request to a non-blocked IPv4 address # ---------------------------------------------------------- # Make the request d = defer.ensureDeferred(cl.get_json("http://testserv:8008/foo/bar")) diff --git a/tests/media/test_url_previewer.py b/tests/media/test_url_previewer.py new file mode 100644 index 0000000000..3c4c7d6765 --- /dev/null +++ b/tests/media/test_url_previewer.py @@ -0,0 +1,113 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.server import HomeServer +from synapse.util import Clock + +from tests import unittest +from tests.unittest import override_config + +try: + import lxml +except ImportError: + lxml = None + + +class URLPreviewTests(unittest.HomeserverTestCase): + if not lxml: + skip = "url preview feature requires lxml" + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + config = self.default_config() + config["url_preview_enabled"] = True + config["max_spider_size"] = 9999999 + config["url_preview_ip_range_blacklist"] = ( + "192.168.1.1", + "1.0.0.0/8", + "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", + "2001:800::/21", + ) + + self.storage_path = self.mktemp() + self.media_store_path = self.mktemp() + os.mkdir(self.storage_path) + os.mkdir(self.media_store_path) + config["media_store_path"] = self.media_store_path + + provider_config = { + "module": "synapse.media.storage_provider.FileStorageProviderBackend", + "store_local": True, + "store_synchronous": False, + "store_remote": True, + "config": {"directory": self.storage_path}, + } + + config["media_storage_providers"] = [provider_config] + + return self.setup_test_homeserver(config=config) + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + media_repo_resource = hs.get_media_repository_resource() + preview_url = media_repo_resource.children[b"preview_url"] + self.url_previewer = preview_url._url_previewer + + def test_all_urls_allowed(self) -> None: + self.assertFalse(self.url_previewer._is_url_blocked("http://matrix.org")) + self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org")) + self.assertFalse(self.url_previewer._is_url_blocked("http://localhost:8000")) + self.assertFalse( + self.url_previewer._is_url_blocked("http://user:pass@matrix.org") + ) + + @override_config( + { + "url_preview_url_blacklist": [ + {"username": "user"}, + {"scheme": "http", "netloc": "matrix.org"}, + ] + } + ) + def test_blocked_url(self) -> None: + # Blocked via scheme and URL. + self.assertTrue(self.url_previewer._is_url_blocked("http://matrix.org")) + # Not blocked because all components must match. + self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org")) + + # Blocked due to the user. + self.assertTrue( + self.url_previewer._is_url_blocked("http://user:pass@example.com") + ) + self.assertTrue(self.url_previewer._is_url_blocked("http://user@example.com")) + + @override_config({"url_preview_url_blacklist": [{"netloc": "*.example.com"}]}) + def test_glob_blocked_url(self) -> None: + # All subdomains are blocked. + self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com")) + self.assertTrue(self.url_previewer._is_url_blocked("http://.example.com")) + + # The TLD is not blocked. + self.assertFalse(self.url_previewer._is_url_blocked("https://example.com")) + + @override_config({"url_preview_url_blacklist": [{"netloc": "^.+\\.example\\.com"}]}) + def test_regex_blocked_urL(self) -> None: + # All subdomains are blocked. + self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com")) + # Requires a non-empty subdomain. + self.assertFalse(self.url_previewer._is_url_blocked("http://.example.com")) + + # The TLD is not blocked. + self.assertFalse(self.url_previewer._is_url_blocked("https://example.com")) diff --git a/tests/push/test_http.py b/tests/push/test_http.py index 54f558742d..e68a979ee0 100644 --- a/tests/push/test_http.py +++ b/tests/push/test_http.py @@ -52,7 +52,7 @@ class HTTPPusherTests(HomeserverTestCase): m.post_json_get_json = post_json_get_json - hs = self.setup_test_homeserver(proxied_blacklisted_http_client=m) + hs = self.setup_test_homeserver(proxied_blocklisted_http_client=m) return hs diff --git a/tests/replication/slave/storage/__init__.py b/tests/replication/slave/storage/__init__.py deleted file mode 100644 index f43a360a80..0000000000 --- a/tests/replication/slave/storage/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/tests/replication/slave/__init__.py b/tests/replication/storage/__init__.py index f43a360a80..f43a360a80 100644 --- a/tests/replication/slave/__init__.py +++ b/tests/replication/storage/__init__.py diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/storage/_base.py index 4c9b494344..de26a62ae1 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/storage/_base.py @@ -24,7 +24,7 @@ from synapse.util import Clock from tests.replication._base import BaseStreamTestCase -class BaseSlavedStoreTestCase(BaseStreamTestCase): +class BaseWorkerStoreTestCase(BaseStreamTestCase): def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: return self.setup_test_homeserver(federation_client=Mock()) @@ -34,7 +34,7 @@ class BaseSlavedStoreTestCase(BaseStreamTestCase): self.reconnect() self.master_store = hs.get_datastores().main - self.slaved_store = self.worker_hs.get_datastores().main + self.worker_store = self.worker_hs.get_datastores().main persistence = hs.get_storage_controllers().persistence assert persistence is not None self.persistance = persistence @@ -50,7 +50,7 @@ class BaseSlavedStoreTestCase(BaseStreamTestCase): self, method: str, args: Iterable[Any], expected_result: Optional[Any] = None ) -> None: master_result = self.get_success(getattr(self.master_store, method)(*args)) - slaved_result = self.get_success(getattr(self.slaved_store, method)(*args)) + worker_result = self.get_success(getattr(self.worker_store, method)(*args)) if expected_result is not None: self.assertEqual( master_result, @@ -59,14 +59,14 @@ class BaseSlavedStoreTestCase(BaseStreamTestCase): % (expected_result, master_result), ) self.assertEqual( - slaved_result, + worker_result, expected_result, - "Expected slave result to be %r but was %r" - % (expected_result, slaved_result), + "Expected worker result to be %r but was %r" + % (expected_result, worker_result), ) self.assertEqual( master_result, - slaved_result, - "Slave result %r does not match master result %r" - % (slaved_result, master_result), + worker_result, + "Worker result %r does not match master result %r" + % (worker_result, master_result), ) diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/storage/test_events.py index b2125b1fea..f7c6417a09 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/storage/test_events.py @@ -36,7 +36,7 @@ from synapse.util import Clock from tests.server import FakeTransport -from ._base import BaseSlavedStoreTestCase +from ._base import BaseWorkerStoreTestCase USER_ID = "@feeling:test" USER_ID_2 = "@bright:test" @@ -63,7 +63,7 @@ def patch__eq__(cls: object) -> Callable[[], None]: return unpatch -class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase): +class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase): STORE_TYPE = EventsWorkerStore def setUp(self) -> None: @@ -294,7 +294,7 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase): assert j2.internal_metadata.stream_ordering is not None event_source = RoomEventSource(self.hs) - event_source.store = self.slaved_store + event_source.store = self.worker_store current_token = event_source.get_current_key() # gradually stream out the replication @@ -310,12 +310,12 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase): # # First, we get a list of the rooms we are joined to joined_rooms = self.get_success( - self.slaved_store.get_rooms_for_user_with_stream_ordering(USER_ID_2) + self.worker_store.get_rooms_for_user_with_stream_ordering(USER_ID_2) ) # Then, we get a list of the events since the last sync membership_changes = self.get_success( - self.slaved_store.get_membership_changes_for_user( + self.worker_store.get_membership_changes_for_user( USER_ID_2, prev_token, current_token ) ) diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py index dcb3e6669b..875811669c 100644 --- a/tests/replication/test_pusher_shard.py +++ b/tests/replication/test_pusher_shard.py @@ -93,7 +93,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase): self.make_worker_hs( "synapse.app.generic_worker", {"worker_name": "pusher1", "pusher_instances": ["pusher1"]}, - proxied_blacklisted_http_client=http_client_mock, + proxied_blocklisted_http_client=http_client_mock, ) event_id = self._create_pusher_and_send_msg("user") @@ -126,7 +126,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase): "worker_name": "pusher1", "pusher_instances": ["pusher1", "pusher2"], }, - proxied_blacklisted_http_client=http_client_mock1, + proxied_blocklisted_http_client=http_client_mock1, ) http_client_mock2 = Mock(spec_set=["post_json_get_json"]) @@ -140,7 +140,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase): "worker_name": "pusher2", "pusher_instances": ["pusher1", "pusher2"], }, - proxied_blacklisted_http_client=http_client_mock2, + proxied_blocklisted_http_client=http_client_mock2, ) # We choose a user name that we know should go to pusher1. diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index 62acf4f44e..dc32982e22 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -42,7 +42,7 @@ from tests.test_utils.html_parsers import TestHtmlParser from tests.unittest import HomeserverTestCase, override_config, skip_unless try: - from authlib.jose import jwk, jwt + from authlib.jose import JsonWebKey, jwt HAS_JWT = True except ImportError: @@ -1054,6 +1054,22 @@ class JWTTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN") self.assertEqual(channel.json_body["error"], "Token field for JWT is missing") + def test_deactivated_user(self) -> None: + """Logging in as a deactivated account should error.""" + user_id = self.register_user("kermit", "monkey") + self.get_success( + self.hs.get_deactivate_account_handler().deactivate_account( + user_id, erase_data=False, requester=create_requester(user_id) + ) + ) + + channel = self.jwt_login({"sub": "kermit"}) + self.assertEqual(channel.code, 403, msg=channel.result) + self.assertEqual(channel.json_body["errcode"], "M_USER_DEACTIVATED") + self.assertEqual( + channel.json_body["error"], "This account has been deactivated" + ) + # The JWTPubKeyTestCase is a complement to JWTTestCase where we instead use # RSS256, with a public key configured in synapse as "jwt_secret", and tokens @@ -1121,7 +1137,7 @@ class JWTPubKeyTestCase(unittest.HomeserverTestCase): def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_privatekey) -> str: header = {"alg": "RS256"} if secret.startswith("-----BEGIN RSA PRIVATE KEY-----"): - secret = jwk.dumps(secret, kty="RSA") + secret = JsonWebKey.import_key(secret, {"kty": "RSA"}) result: bytes = jwt.encode(header, payload, secret) return result.decode("ascii") diff --git a/tests/rest/client/test_mutual_rooms.py b/tests/rest/client/test_mutual_rooms.py index a4327f7ace..22fddbd6d6 100644 --- a/tests/rest/client/test_mutual_rooms.py +++ b/tests/rest/client/test_mutual_rooms.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from urllib.parse import quote + from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin @@ -44,8 +46,8 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase): def _get_mutual_rooms(self, token: str, other_user: str) -> FakeChannel: return self.make_request( "GET", - "/_matrix/client/unstable/uk.half-shot.msc2666/user/mutual_rooms/%s" - % other_user, + "/_matrix/client/unstable/uk.half-shot.msc2666/user/mutual_rooms" + f"?user_id={quote(other_user)}", access_token=token, ) diff --git a/tests/rest/client/test_read_marker.py b/tests/rest/client/test_read_marker.py new file mode 100644 index 0000000000..0eedcdb476 --- /dev/null +++ b/tests/rest/client/test_read_marker.py @@ -0,0 +1,147 @@ +# Copyright 2023 Beeper +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from twisted.test.proto_helpers import MemoryReactor + +import synapse.rest.admin +from synapse.api.constants import EventTypes +from synapse.rest import admin +from synapse.rest.client import login, read_marker, register, room +from synapse.server import HomeServer +from synapse.util import Clock + +from tests import unittest + +ONE_HOUR_MS = 3600000 +ONE_DAY_MS = ONE_HOUR_MS * 24 + + +class ReadMarkerTestCase(unittest.HomeserverTestCase): + servlets = [ + login.register_servlets, + register.register_servlets, + read_marker.register_servlets, + room.register_servlets, + synapse.rest.admin.register_servlets, + admin.register_servlets, + ] + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + config = self.default_config() + + # merge this default retention config with anything that was specified in + # @override_config + retention_config = { + "enabled": True, + "allowed_lifetime_min": ONE_DAY_MS, + "allowed_lifetime_max": ONE_DAY_MS * 3, + } + retention_config.update(config.get("retention", {})) + config["retention"] = retention_config + + self.hs = self.setup_test_homeserver(config=config) + + return self.hs + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.owner = self.register_user("owner", "pass") + self.owner_tok = self.login("owner", "pass") + self.store = self.hs.get_datastores().main + self.clock = self.hs.get_clock() + + def test_send_read_marker(self) -> None: + room_id = self.helper.create_room_as(self.owner, tok=self.owner_tok) + + def send_message() -> str: + res = self.helper.send(room_id=room_id, body="1", tok=self.owner_tok) + return res["event_id"] + + # Test setting the read marker on the room + event_id_1 = send_message() + + channel = self.make_request( + "POST", + "/rooms/!abc:beep/read_markers", + content={ + "m.fully_read": event_id_1, + }, + access_token=self.owner_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Test moving the read marker to a newer event + event_id_2 = send_message() + channel = self.make_request( + "POST", + "/rooms/!abc:beep/read_markers", + content={ + "m.fully_read": event_id_2, + }, + access_token=self.owner_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + def test_send_read_marker_missing_previous_event(self) -> None: + """ + Test moving a read marker from an event that previously existed but was + later removed due to retention rules. + """ + + room_id = self.helper.create_room_as(self.owner, tok=self.owner_tok) + + # Set retention rule on the room so we remove old events to test this case + self.helper.send_state( + room_id=room_id, + event_type=EventTypes.Retention, + body={"max_lifetime": ONE_DAY_MS}, + tok=self.owner_tok, + ) + + def send_message() -> str: + res = self.helper.send(room_id=room_id, body="1", tok=self.owner_tok) + return res["event_id"] + + # Test setting the read marker on the room + event_id_1 = send_message() + + channel = self.make_request( + "POST", + "/rooms/!abc:beep/read_markers", + content={ + "m.fully_read": event_id_1, + }, + access_token=self.owner_tok, + ) + + # Send a second message (retention will not remove the latest event ever) + send_message() + # And then advance so retention rules remove the first event (where the marker is) + self.reactor.advance(ONE_DAY_MS * 2 / 1000) + + event = self.get_success(self.store.get_event(event_id_1, allow_none=True)) + assert event is None + + # TODO See https://github.com/matrix-org/synapse/issues/13476 + self.store.get_event_ordering.invalidate_all() + + # Test moving the read marker to a newer event + event_id_2 = send_message() + channel = self.make_request( + "POST", + "/rooms/!abc:beep/read_markers", + content={ + "m.fully_read": event_id_2, + }, + access_token=self.owner_tok, + ) + self.assertEqual(channel.code, 200, channel.result) diff --git a/tests/rest/media/test_url_preview.py b/tests/rest/media/test_url_preview.py index e44beae8c1..170fb0534a 100644 --- a/tests/rest/media/test_url_preview.py +++ b/tests/rest/media/test_url_preview.py @@ -418,9 +418,9 @@ class URLPreviewTests(unittest.HomeserverTestCase): channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} ) - def test_blacklisted_ip_specific(self) -> None: + def test_blocked_ip_specific(self) -> None: """ - Blacklisted IP addresses, found via DNS, are not spidered. + Blocked IP addresses, found via DNS, are not spidered. """ self.lookups["example.com"] = [(IPv4Address, "192.168.1.1")] @@ -439,9 +439,9 @@ class URLPreviewTests(unittest.HomeserverTestCase): }, ) - def test_blacklisted_ip_range(self) -> None: + def test_blocked_ip_range(self) -> None: """ - Blacklisted IP ranges, IPs found over DNS, are not spidered. + Blocked IP ranges, IPs found over DNS, are not spidered. """ self.lookups["example.com"] = [(IPv4Address, "1.1.1.2")] @@ -458,9 +458,9 @@ class URLPreviewTests(unittest.HomeserverTestCase): }, ) - def test_blacklisted_ip_specific_direct(self) -> None: + def test_blocked_ip_specific_direct(self) -> None: """ - Blacklisted IP addresses, accessed directly, are not spidered. + Blocked IP addresses, accessed directly, are not spidered. """ channel = self.make_request( "GET", "preview_url?url=http://192.168.1.1", shorthand=False @@ -470,16 +470,13 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.assertEqual(len(self.reactor.tcpClients), 0) self.assertEqual( channel.json_body, - { - "errcode": "M_UNKNOWN", - "error": "IP address blocked by IP blacklist entry", - }, + {"errcode": "M_UNKNOWN", "error": "IP address blocked"}, ) self.assertEqual(channel.code, 403) - def test_blacklisted_ip_range_direct(self) -> None: + def test_blocked_ip_range_direct(self) -> None: """ - Blacklisted IP ranges, accessed directly, are not spidered. + Blocked IP ranges, accessed directly, are not spidered. """ channel = self.make_request( "GET", "preview_url?url=http://1.1.1.2", shorthand=False @@ -488,15 +485,12 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, 403) self.assertEqual( channel.json_body, - { - "errcode": "M_UNKNOWN", - "error": "IP address blocked by IP blacklist entry", - }, + {"errcode": "M_UNKNOWN", "error": "IP address blocked"}, ) - def test_blacklisted_ip_range_whitelisted_ip(self) -> None: + def test_blocked_ip_range_whitelisted_ip(self) -> None: """ - Blacklisted but then subsequently whitelisted IP addresses can be + Blocked but then subsequently whitelisted IP addresses can be spidered. """ self.lookups["example.com"] = [(IPv4Address, "1.1.1.1")] @@ -527,10 +521,10 @@ class URLPreviewTests(unittest.HomeserverTestCase): channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} ) - def test_blacklisted_ip_with_external_ip(self) -> None: + def test_blocked_ip_with_external_ip(self) -> None: """ - If a hostname resolves a blacklisted IP, even if there's a - non-blacklisted one, it will be rejected. + If a hostname resolves a blocked IP, even if there's a non-blocked one, + it will be rejected. """ # Hardcode the URL resolving to the IP we want. self.lookups["example.com"] = [ @@ -550,9 +544,9 @@ class URLPreviewTests(unittest.HomeserverTestCase): }, ) - def test_blacklisted_ipv6_specific(self) -> None: + def test_blocked_ipv6_specific(self) -> None: """ - Blacklisted IP addresses, found via DNS, are not spidered. + Blocked IP addresses, found via DNS, are not spidered. """ self.lookups["example.com"] = [ (IPv6Address, "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff") @@ -573,9 +567,9 @@ class URLPreviewTests(unittest.HomeserverTestCase): }, ) - def test_blacklisted_ipv6_range(self) -> None: + def test_blocked_ipv6_range(self) -> None: """ - Blacklisted IP ranges, IPs found over DNS, are not spidered. + Blocked IP ranges, IPs found over DNS, are not spidered. """ self.lookups["example.com"] = [(IPv6Address, "2001:800::1")] @@ -653,6 +647,57 @@ class URLPreviewTests(unittest.HomeserverTestCase): server.data, ) + def test_image(self) -> None: + """An image should be precached if mentioned in the HTML.""" + self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] + self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")] + + result = ( + b"""<html><body><img src="http://cdn.matrix.org/foo.png"></body></html>""" + ) + + channel = self.make_request( + "GET", + "preview_url?url=http://matrix.org", + shorthand=False, + await_result=False, + ) + self.pump() + + # Respond with the HTML. + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b'Content-Type: text/html; charset="utf8"\r\n\r\n' + ) + % (len(result),) + + result + ) + self.pump() + + # Respond with the photo. + client = self.reactor.tcpClients[1][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b"Content-Type: image/png\r\n\r\n" + ) + % (len(SMALL_PNG),) + + SMALL_PNG + ) + self.pump() + + # The image should be in the result. + self.assertEqual(channel.code, 200) + self._assert_small_png(channel.json_body) + def test_nonexistent_image(self) -> None: """If the preview image doesn't exist, ensure some data is returned.""" self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] @@ -683,9 +728,53 @@ class URLPreviewTests(unittest.HomeserverTestCase): ) self.pump() + + # There should not be a second connection. + self.assertEqual(len(self.reactor.tcpClients), 1) + + # The image should not be in the result. self.assertEqual(channel.code, 200) + self.assertNotIn("og:image", channel.json_body) + + @unittest.override_config( + {"url_preview_url_blacklist": [{"netloc": "cdn.matrix.org"}]} + ) + def test_image_blocked(self) -> None: + """If the preview image doesn't exist, ensure some data is returned.""" + self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] + self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")] + + result = ( + b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>""" + ) + + channel = self.make_request( + "GET", + "preview_url?url=http://matrix.org", + shorthand=False, + await_result=False, + ) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b'Content-Type: text/html; charset="utf8"\r\n\r\n' + ) + % (len(result),) + + result + ) + self.pump() + + # There should not be a second connection. + self.assertEqual(len(self.reactor.tcpClients), 1) # The image should not be in the result. + self.assertEqual(channel.code, 200) self.assertNotIn("og:image", channel.json_body) def test_oembed_failure(self) -> None: @@ -880,6 +969,11 @@ class URLPreviewTests(unittest.HomeserverTestCase): ) self.pump() + + # Double check that the proper host is being connected to. (Note that + # twitter.com can't be resolved so this is already implicitly checked.) + self.assertIn(b"\r\nHost: publish.twitter.com\r\n", server.data) + self.assertEqual(channel.code, 200) body = channel.json_body self.assertEqual( @@ -940,6 +1034,22 @@ class URLPreviewTests(unittest.HomeserverTestCase): }, ) + @unittest.override_config( + {"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]} + ) + def test_oembed_blocked(self) -> None: + """The oEmbed URL should not be downloaded if the oEmbed URL is blocked.""" + self.lookups["twitter.com"] = [(IPv4Address, "10.1.2.3")] + + channel = self.make_request( + "GET", + "preview_url?url=http://twitter.com/matrixdotorg/status/12345", + shorthand=False, + await_result=False, + ) + self.pump() + self.assertEqual(channel.code, 403, channel.result) + def test_oembed_autodiscovery(self) -> None: """ Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL. @@ -980,7 +1090,6 @@ class URLPreviewTests(unittest.HomeserverTestCase): % (len(result),) + result ) - self.pump() # The oEmbed response. @@ -1004,7 +1113,6 @@ class URLPreviewTests(unittest.HomeserverTestCase): % (len(oembed_content),) + oembed_content ) - self.pump() # Ensure the URL is what was requested. @@ -1023,7 +1131,6 @@ class URLPreviewTests(unittest.HomeserverTestCase): % (len(SMALL_PNG),) + SMALL_PNG ) - self.pump() # Ensure the URL is what was requested. @@ -1036,6 +1143,59 @@ class URLPreviewTests(unittest.HomeserverTestCase): ) self._assert_small_png(body) + @unittest.override_config( + {"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]} + ) + def test_oembed_autodiscovery_blocked(self) -> None: + """ + If the discovered oEmbed URL is blocked, it should be discarded. + """ + # This is a little cheesy in that we use the www subdomain (which isn't the + # list of oEmbed patterns) to get "raw" HTML response. + self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")] + self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.4")] + + result = b""" + <title>Test</title> + <link rel="alternate" type="application/json+oembed" + href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json" + title="matrixdotorg" /> + """ + + channel = self.make_request( + "GET", + "preview_url?url=http://www.twitter.com/matrixdotorg/status/12345", + shorthand=False, + await_result=False, + ) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b'Content-Type: text/html; charset="utf8"\r\n\r\n' + ) + % (len(result),) + + result + ) + + self.pump() + + # Ensure there's no additional connections. + self.assertEqual(len(self.reactor.tcpClients), 1) + + # Ensure the URL is what was requested. + self.assertIn(b"\r\nHost: www.twitter.com\r\n", server.data) + + self.assertEqual(channel.code, 200) + body = channel.json_body + self.assertEqual(body["og:title"], "Test") + self.assertNotIn("og:image", body) + def _download_image(self) -> Tuple[str, str]: """Downloads an image into the URL cache. Returns: @@ -1192,8 +1352,8 @@ class URLPreviewTests(unittest.HomeserverTestCase): ) @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]}) - def test_blacklist_port(self) -> None: - """Tests that blacklisting URLs with a port makes previewing such URLs + def test_blocked_port(self) -> None: + """Tests that blocking URLs with a port makes previewing such URLs fail with a 403 error and doesn't impact other previews. """ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] @@ -1230,3 +1390,23 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.pump() self.assertEqual(channel.code, 200) + + @unittest.override_config( + {"url_preview_url_blacklist": [{"netloc": "example.com"}]} + ) + def test_blocked_url(self) -> None: + """Tests that blocking URLs with a host makes previewing such URLs + fail with a 403 error. + """ + self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")] + + bad_url = quote("http://example.com/foo") + + channel = self.make_request( + "GET", + "preview_url?url=" + bad_url, + shorthand=False, + await_result=False, + ) + self.pump() + self.assertEqual(channel.code, 403, channel.result) diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py index 6ec34997ea..f9cf0fcb82 100644 --- a/tests/storage/test_profile.py +++ b/tests/storage/test_profile.py @@ -14,6 +14,8 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.server import HomeServer +from synapse.storage.database import LoggingTransaction +from synapse.storage.engines import PostgresEngine from synapse.types import UserID from synapse.util import Clock @@ -69,3 +71,64 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase): self.assertIsNone( self.get_success(self.store.get_profile_avatar_url(self.u_frank.localpart)) ) + + def test_profiles_bg_migration(self) -> None: + """ + Test background job that copies entries from column user_id to full_user_id, adding + the hostname in the process. + """ + updater = self.hs.get_datastores().main.db_pool.updates + + # drop the constraint so we can insert nulls in full_user_id to populate the test + if isinstance(self.store.database_engine, PostgresEngine): + + def f(txn: LoggingTransaction) -> None: + txn.execute( + "ALTER TABLE profiles DROP CONSTRAINT full_user_id_not_null" + ) + + self.get_success(self.store.db_pool.runInteraction("", f)) + + for i in range(0, 70): + self.get_success( + self.store.db_pool.simple_insert( + "profiles", + {"user_id": f"hello{i:02}"}, + ) + ) + + # re-add the constraint so that when it's validated it actually exists + if isinstance(self.store.database_engine, PostgresEngine): + + def f(txn: LoggingTransaction) -> None: + txn.execute( + "ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID" + ) + + self.get_success(self.store.db_pool.runInteraction("", f)) + + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + values={ + "update_name": "populate_full_user_id_profiles", + "progress_json": "{}", + }, + ) + ) + + self.get_success( + updater.run_background_updates(False), + ) + + expected_values = [] + for i in range(0, 70): + expected_values.append((f"@hello{i:02}:{self.hs.hostname}",)) + + res = self.get_success( + self.store.db_pool.execute( + "", None, "SELECT full_user_id from profiles ORDER BY full_user_id" + ) + ) + self.assertEqual(len(res), len(expected_values)) + self.assertEqual(res, expected_values) diff --git a/tests/storage/test_user_filters.py b/tests/storage/test_user_filters.py new file mode 100644 index 0000000000..bab802f56e --- /dev/null +++ b/tests/storage/test_user_filters.py @@ -0,0 +1,94 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.server import HomeServer +from synapse.storage.database import LoggingTransaction +from synapse.storage.engines import PostgresEngine +from synapse.util import Clock + +from tests import unittest + + +class UserFiltersStoreTestCase(unittest.HomeserverTestCase): + """ + Test background migration that copies entries from column user_id to full_user_id, adding + the hostname in the process. + """ + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + + def test_bg_migration(self) -> None: + updater = self.hs.get_datastores().main.db_pool.updates + + # drop the constraint so we can insert nulls in full_user_id to populate the test + if isinstance(self.store.database_engine, PostgresEngine): + + def f(txn: LoggingTransaction) -> None: + txn.execute( + "ALTER TABLE user_filters DROP CONSTRAINT full_user_id_not_null" + ) + + self.get_success(self.store.db_pool.runInteraction("", f)) + + for i in range(0, 70): + self.get_success( + self.store.db_pool.simple_insert( + "user_filters", + { + "user_id": f"hello{i:02}", + "filter_id": i, + "filter_json": bytearray(i), + }, + ) + ) + + # re-add the constraint so that when it's validated it actually exists + if isinstance(self.store.database_engine, PostgresEngine): + + def f(txn: LoggingTransaction) -> None: + txn.execute( + "ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID" + ) + + self.get_success(self.store.db_pool.runInteraction("", f)) + + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + values={ + "update_name": "populate_full_user_id_user_filters", + "progress_json": "{}", + }, + ) + ) + + self.get_success( + updater.run_background_updates(False), + ) + + expected_values = [] + for i in range(0, 70): + expected_values.append((f"@hello{i:02}:{self.hs.hostname}",)) + + res = self.get_success( + self.store.db_pool.execute( + "", None, "SELECT full_user_id from user_filters ORDER BY full_user_id" + ) + ) + self.assertEqual(len(res), len(expected_values)) + self.assertEqual(res, expected_values) diff --git a/tests/test_state.py b/tests/test_state.py index 2029d3d60a..ddf59916b1 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -264,7 +264,7 @@ class StateTestCase(unittest.TestCase): self.dummy_store.register_events(graph.walk()) - context_store: dict[str, EventContext] = {} + context_store: Dict[str, EventContext] = {} for event in graph.walk(): context = yield defer.ensureDeferred( diff --git a/tests/test_utils/logging_setup.py b/tests/test_utils/logging_setup.py index b522163a34..c37f205ed0 100644 --- a/tests/test_utils/logging_setup.py +++ b/tests/test_utils/logging_setup.py @@ -40,10 +40,9 @@ def setup_logging() -> None: """ root_logger = logging.getLogger() - log_format = ( - "%(asctime)s - %(name)s - %(lineno)d - " - "%(levelname)s - %(request)s - %(message)s" - ) + # We exclude `%(asctime)s` from this format because the Twisted logger adds its own + # timestamp + log_format = "%(name)s - %(lineno)d - " "%(levelname)s - %(request)s - %(message)s" handler = ToTwistedHandler() formatter = logging.Formatter(log_format) diff --git a/tests/unittest.py b/tests/unittest.py index b6fdf69635..c73195b32b 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import functools import gc import hashlib import hmac @@ -150,7 +151,11 @@ def deepcopy_config(config: _TConfig) -> _TConfig: return new_config -_make_homeserver_config_obj_cache: Dict[str, Union[RootConfig, Config]] = {} +@functools.lru_cache(maxsize=8) +def _parse_config_dict(config: str) -> RootConfig: + config_obj = HomeServerConfig() + config_obj.parse_config_dict(json.loads(config), "", "") + return config_obj def make_homeserver_config_obj(config: Dict[str, Any]) -> RootConfig: @@ -164,21 +169,7 @@ def make_homeserver_config_obj(config: Dict[str, Any]) -> RootConfig: but it keeps a cache of `HomeServerConfig` instances and deepcopies them as needed, to avoid validating the whole configuration every time. """ - cache_key = json.dumps(config) - - if cache_key in _make_homeserver_config_obj_cache: - # Cache hit: reuse the existing instance - config_obj = _make_homeserver_config_obj_cache[cache_key] - else: - # Cache miss; create the actual instance - config_obj = HomeServerConfig() - config_obj.parse_config_dict(config, "", "") - - # Add to the cache - _make_homeserver_config_obj_cache[cache_key] = config_obj - - assert isinstance(config_obj, RootConfig) - + config_obj = _parse_config_dict(json.dumps(config, sort_keys=True)) return deepcopy_config(config_obj) @@ -229,13 +220,20 @@ class TestCase(unittest.TestCase): # # The easiest way to do this would be to do a full GC after each test # run, but that is very expensive. Instead, we disable GC (above) for - # the duration of the test so that we only need to run a gen-0 GC, which - # is a lot quicker. + # the duration of the test and only run a gen-0 GC, which is a lot + # quicker. This doesn't clean up everything, since the TestCase + # instance still holds references to objects created during the test, + # such as HomeServers, so we do a full GC every so often. @around(self) def tearDown(orig: Callable[[], R]) -> R: ret = orig() gc.collect(0) + # Run a full GC every 50 gen-0 GCs. + gen0_stats = gc.get_stats()[0] + gen0_collections = gen0_stats["collections"] + if gen0_collections % 50 == 0: + gc.collect() gc.enable() set_current_context(SENTINEL_CONTEXT) |