diff options
187 files changed, 6110 insertions, 2961 deletions
diff --git a/.buildkite/.env b/.buildkite/.env new file mode 100644 index 0000000000..85b102d07f --- /dev/null +++ b/.buildkite/.env @@ -0,0 +1,13 @@ +CI +BUILDKITE +BUILDKITE_BUILD_NUMBER +BUILDKITE_BRANCH +BUILDKITE_BUILD_NUMBER +BUILDKITE_JOB_ID +BUILDKITE_BUILD_URL +BUILDKITE_PROJECT_SLUG +BUILDKITE_COMMIT +BUILDKITE_PULL_REQUEST +BUILDKITE_TAG +CODECOV_TOKEN +TRIAL_FLAGS diff --git a/.buildkite/docker-compose.py27.pg94.yaml b/.buildkite/docker-compose.py27.pg94.yaml new file mode 100644 index 0000000000..2d4b9eadd9 --- /dev/null +++ b/.buildkite/docker-compose.py27.pg94.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.4 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:2.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py27.pg95.yaml b/.buildkite/docker-compose.py27.pg95.yaml new file mode 100644 index 0000000000..c6a41f1da0 --- /dev/null +++ b/.buildkite/docker-compose.py27.pg95.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.5 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:2.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py35.pg94.yaml b/.buildkite/docker-compose.py35.pg94.yaml new file mode 100644 index 0000000000..978aedd115 --- /dev/null +++ b/.buildkite/docker-compose.py35.pg94.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.4 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.5 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py35.pg95.yaml b/.buildkite/docker-compose.py35.pg95.yaml new file mode 100644 index 0000000000..2f14387fbc --- /dev/null +++ b/.buildkite/docker-compose.py35.pg95.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.5 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.5 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py37.pg11.yaml b/.buildkite/docker-compose.py37.pg11.yaml new file mode 100644 index 0000000000..f3eec05ceb --- /dev/null +++ b/.buildkite/docker-compose.py37.pg11.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:11 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py37.pg95.yaml b/.buildkite/docker-compose.py37.pg95.yaml new file mode 100644 index 0000000000..2a41db8eba --- /dev/null +++ b/.buildkite/docker-compose.py37.pg95.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.5 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 0000000000..369a1ffed1 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,157 @@ +env: + CODECOV_TOKEN: "2dd7eb9b-0eda-45fe-a47c-9b5ac040045f" + +steps: + - command: + - "python -m pip install tox" + - "tox -e pep8" + label: "\U0001F9F9 PEP-8" + plugins: + - docker#v3.0.1: + image: "python:3.6" + + - command: + - "python -m pip install tox" + - "tox -e packaging" + label: "\U0001F9F9 packaging" + plugins: + - docker#v3.0.1: + image: "python:3.6" + + - command: + - "python -m pip install tox" + - "tox -e check_isort" + label: "\U0001F9F9 isort" + plugins: + - docker#v3.0.1: + image: "python:3.6" + + - command: + - "python -m pip install tox" + - "scripts-dev/check-newsfragment" + label: ":newspaper: Newsfile" + branches: "!master !develop !release-*" + plugins: + - docker#v3.0.1: + image: "python:3.6" + propagate-environment: true + + - wait + + - command: + - "python -m pip install tox" + - "tox -e check-sampleconfig" + label: "\U0001F9F9 check-sample-config" + plugins: + - docker#v3.0.1: + image: "python:3.6" + + - command: + - "python -m pip install tox" + - "tox -e py27,codecov" + label: ":python: 2.7 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:2.7" + propagate-environment: true + + - command: + - "python -m pip install tox" + - "tox -e py35,codecov" + label: ":python: 3.5 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:3.5" + propagate-environment: true + + - command: + - "python -m pip install tox" + - "tox -e py36,codecov" + label: ":python: 3.6 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:3.6" + propagate-environment: true + + - command: + - "python -m pip install tox" + - "tox -e py37,codecov" + label: ":python: 3.7 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:3.7" + propagate-environment: true + + - label: ":python: 2.7 / :postgres: 9.4" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py27-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py27.pg94.yaml + + - label: ":python: 2.7 / :postgres: 9.5" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py27-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py27.pg95.yaml + + - label: ":python: 3.5 / :postgres: 9.4" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py35-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py35.pg94.yaml + + - label: ":python: 3.5 / :postgres: 9.5" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py35-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py35.pg95.yaml + + - label: ":python: 3.7 / :postgres: 9.5" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py37-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py37.pg95.yaml + + - label: ":python: 3.7 / :postgres: 11" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py37-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py37.pg11.yaml diff --git a/.gitignore b/.gitignore index a20f3e615d..a84c41b0c9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,11 +12,15 @@ _trial_temp/ _trial_temp*/ # stuff that is likely to exist when you run a server locally +/*.db +/*.log +/*.log.config +/*.pid /*.signing.key -/*.tls.crt -/*.tls.key -/uploads +/env/ +/homeserver*.yaml /media_store/ +/uploads # IDEs /.idea/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0d0fa7082a..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,97 +0,0 @@ -dist: xenial -language: python - -cache: - directories: - # we only bother to cache the wheels; parts of the http cache get - # invalidated every build (because they get served with a max-age of 600 - # seconds), which means that we end up re-uploading the whole cache for - # every build, which is time-consuming In any case, it's not obvious that - # downloading the cache from S3 would be much faster than downloading the - # originals from pypi. - # - - $HOME/.cache/pip/wheels - -# don't clone the whole repo history, one commit will do -git: - depth: 1 - -# only build branches we care about (PRs are built seperately) -branches: - only: - - master - - develop - - /^release-v/ - - rav/pg95 - -# When running the tox environments that call Twisted Trial, we can pass the -j -# flag to run the tests concurrently. We set this to 2 for CPU bound tests -# (SQLite) and 4 for I/O bound tests (PostgreSQL). -matrix: - fast_finish: true - include: - - name: "pep8" - python: 3.6 - env: TOX_ENV="pep8,check_isort,packaging" - - - name: "py2.7 / sqlite" - python: 2.7 - env: TOX_ENV=py27,codecov TRIAL_FLAGS="-j 2" - - - name: "py2.7 / sqlite / olddeps" - python: 2.7 - env: TOX_ENV=py27-old TRIAL_FLAGS="-j 2" - - - name: "py2.7 / postgres9.5" - python: 2.7 - addons: - postgresql: "9.5" - env: TOX_ENV=py27-postgres,codecov TRIAL_FLAGS="-j 4" - services: - - postgresql - - - name: "py3.5 / sqlite" - python: 3.5 - env: TOX_ENV=py35,codecov TRIAL_FLAGS="-j 2" - - - name: "py3.7 / sqlite" - python: 3.7 - env: TOX_ENV=py37,codecov TRIAL_FLAGS="-j 2" - - - name: "py3.7 / postgres9.4" - python: 3.7 - addons: - postgresql: "9.4" - env: TOX_ENV=py37-postgres TRIAL_FLAGS="-j 4" - services: - - postgresql - - - name: "py3.7 / postgres9.5" - python: 3.7 - addons: - postgresql: "9.5" - env: TOX_ENV=py37-postgres,codecov TRIAL_FLAGS="-j 4" - services: - - postgresql - - - # we only need to check for the newsfragment if it's a PR build - if: type = pull_request - name: "check-newsfragment" - python: 3.6 - script: scripts-dev/check-newsfragment - -install: - # this just logs the postgres version we will be testing against (if any) - - psql -At -U postgres -c 'select version();' || true - - - pip install tox - - # if we don't have python3.6 in this environment, travis unhelpfully gives us - # a `python3.6` on our path which does nothing but spit out a warning. Tox - # tries to run it (even if we're not running a py36 env), so the build logs - # then have warnings which look like errors. To reduce the noise, remove the - # non-functional python3.6. - - ( ! command -v python3.6 || python3.6 --version ) &>/dev/null || rm -f $(command -v python3.6) - -script: - - tox -e $TOX_ENV diff --git a/AUTHORS.rst b/AUTHORS.rst index d599aec74c..3ea18eefcb 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -69,3 +69,6 @@ Serban Constantin <serban.constantin at gmail dot com> Jason Robinson <jasonr at matrix.org> * Minor fixes + +Joseph Weston <joseph at weston.cloud> + + Add admin API for querying HS version diff --git a/INSTALL.md b/INSTALL.md index 6105cd6db8..de6893530d 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -375,9 +375,13 @@ To configure Synapse to expose an HTTPS port, you will need to edit * You will also need to uncomment the `tls_certificate_path` and `tls_private_key_path` lines under the `TLS` section. You can either point these settings at an existing certificate and key, or you can - enable Synapse's built-in ACME (Let's Encrypt) support. Instructions + enable Synapse's built-in ACME (Let's Encrypt) support. Instructions for having Synapse automatically provision and renew federation - certificates through ACME can be found at [ACME.md](docs/ACME.md). + certificates through ACME can be found at [ACME.md](docs/ACME.md). If you + are using your own certificate, be sure to use a `.pem` file that includes + the full certificate chain including any intermediate certificates (for + instance, if using certbot, use `fullchain.pem` as your certificate, not + `cert.pem`). For those of you upgrading your TLS certificate in readiness for Synapse 1.0, please take a look at `our guide <docs/MSC1711_certificates_FAQ.md#configuring-certificates-for-compatibility-with-synapse-100>`_. @@ -406,8 +410,8 @@ This process uses a setting `registration_shared_secret` in `homeserver.yaml`, which is shared between Synapse itself and the `register_new_matrix_user` script. It doesn't matter what it is (a random value is generated by `--generate-config`), but it should be kept secret, as -anyone with knowledge of it can register users on your server even if -`enable_registration` is `false`. +anyone with knowledge of it can register users, including admin accounts, +on your server even if `enable_registration` is `false`. ## Setting up a TURN server diff --git a/MANIFEST.in b/MANIFEST.in index eb2de60f72..0500dd6b87 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -39,6 +39,7 @@ prune .circleci prune .coveragerc prune debian prune .codecov.yml +prune .buildkite exclude jenkins* recursive-exclude jenkins *.sh diff --git a/README.rst b/README.rst index 7cb2c82b79..24afb93d7d 100644 --- a/README.rst +++ b/README.rst @@ -120,9 +120,9 @@ recommended to also set up CAPTCHA - see `<docs/CAPTCHA_SETUP.rst>`_.) Once ``enable_registration`` is set to ``true``, it is possible to register a user via `riot.im <https://riot.im/app/#/register>`_ or other Matrix clients. -Your new user name will be formed partly from the ``server_name`` (see -`Configuring synapse`_), and partly from a localpart you specify when you -create the account. Your name will take the form of:: +Your new user name will be formed partly from the ``server_name``, and partly +from a localpart you specify when you create the account. Your name will take +the form of:: @localpart:my.domain.name diff --git a/changelog.d/2090.bugfix b/changelog.d/2090.bugfix new file mode 100644 index 0000000000..de2d22fcb8 --- /dev/null +++ b/changelog.d/2090.bugfix @@ -0,0 +1 @@ +Fix a bug where media with spaces in the name would get a corrupted name. diff --git a/changelog.d/4537.feature b/changelog.d/4537.feature new file mode 100644 index 0000000000..8f792b8890 --- /dev/null +++ b/changelog.d/4537.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. diff --git a/changelog.d/4662.misc b/changelog.d/4662.misc new file mode 100644 index 0000000000..f4ec0d6a68 --- /dev/null +++ b/changelog.d/4662.misc @@ -0,0 +1 @@ +Add a systemd setup that supports synapse workers. Contributed by Luca Corbatto. diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix new file mode 100644 index 0000000000..1d7f3174e7 --- /dev/null +++ b/changelog.d/4699.bugfix @@ -0,0 +1 @@ +Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events. diff --git a/changelog.d/4735.feature b/changelog.d/4735.feature new file mode 100644 index 0000000000..a4c0b196f6 --- /dev/null +++ b/changelog.d/4735.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /register endpoint. diff --git a/changelog.d/4740.bugfix b/changelog.d/4740.bugfix new file mode 100644 index 0000000000..f82bb4227a --- /dev/null +++ b/changelog.d/4740.bugfix @@ -0,0 +1 @@ +'event_id' is now a required parameter in federated state requests, as per the matrix spec. diff --git a/changelog.d/4749.bugfix b/changelog.d/4749.bugfix new file mode 100644 index 0000000000..174e6b4e5e --- /dev/null +++ b/changelog.d/4749.bugfix @@ -0,0 +1 @@ +Fix tightloop over connecting to replication server. diff --git a/changelog.d/4752.misc b/changelog.d/4752.misc new file mode 100644 index 0000000000..fb1e76edce --- /dev/null +++ b/changelog.d/4752.misc @@ -0,0 +1 @@ +Change from TravisCI to Buildkite for CI. diff --git a/changelog.d/4757.feature b/changelog.d/4757.feature new file mode 100644 index 0000000000..b89029f2b4 --- /dev/null +++ b/changelog.d/4757.feature @@ -0,0 +1 @@ +Move server key queries to federation reader. diff --git a/changelog.d/4757.misc b/changelog.d/4757.misc new file mode 100644 index 0000000000..42bb66f7aa --- /dev/null +++ b/changelog.d/4757.misc @@ -0,0 +1 @@ +When presence is disabled don't send over replication. diff --git a/changelog.d/4759.feature b/changelog.d/4759.feature new file mode 100644 index 0000000000..643ee404dc --- /dev/null +++ b/changelog.d/4759.feature @@ -0,0 +1 @@ +Add support for /account/3pid REST endpoint to client_reader worker. diff --git a/changelog.d/4763.bugfix b/changelog.d/4763.bugfix new file mode 100644 index 0000000000..213ea44b70 --- /dev/null +++ b/changelog.d/4763.bugfix @@ -0,0 +1 @@ +Fix parsing of Content-Disposition headers on remote media requests and URL previews. diff --git a/changelog.d/4765.misc b/changelog.d/4765.misc new file mode 100644 index 0000000000..c273fd0cc4 --- /dev/null +++ b/changelog.d/4765.misc @@ -0,0 +1 @@ +Minor docstring fixes for MatrixFederationAgent. \ No newline at end of file diff --git a/changelog.d/4770.misc b/changelog.d/4770.misc new file mode 100644 index 0000000000..144d819958 --- /dev/null +++ b/changelog.d/4770.misc @@ -0,0 +1 @@ +Optimise EDU transmission for the federation_sender worker. diff --git a/changelog.d/4771.misc b/changelog.d/4771.misc new file mode 100644 index 0000000000..8fa3401ca4 --- /dev/null +++ b/changelog.d/4771.misc @@ -0,0 +1 @@ +Update test_typing to use HomeserverTestCase. diff --git a/changelog.d/4772.feature b/changelog.d/4772.feature new file mode 100644 index 0000000000..19bb91f1e8 --- /dev/null +++ b/changelog.d/4772.feature @@ -0,0 +1 @@ +Add an endpoint to the admin API for querying the server version. Contributed by Joseph Weston. diff --git a/changelog.d/4776.bugfix b/changelog.d/4776.bugfix new file mode 100644 index 0000000000..ce3e6ce33c --- /dev/null +++ b/changelog.d/4776.bugfix @@ -0,0 +1 @@ +Fix incorrect log about not persisting duplicate state event. diff --git a/changelog.d/4779.misc b/changelog.d/4779.misc new file mode 100644 index 0000000000..2442bf31bd --- /dev/null +++ b/changelog.d/4779.misc @@ -0,0 +1 @@ +Update URLs for riot.im icons and logos in the default notification templates. diff --git a/changelog.d/4790.bugfix b/changelog.d/4790.bugfix new file mode 100644 index 0000000000..aa8eb93246 --- /dev/null +++ b/changelog.d/4790.bugfix @@ -0,0 +1 @@ +Fix v4v6 option in HAProxy example config. Contributed by Flakebi. diff --git a/changelog.d/4791.feature b/changelog.d/4791.feature new file mode 100644 index 0000000000..1e5fd32463 --- /dev/null +++ b/changelog.d/4791.feature @@ -0,0 +1 @@ +Include a default configuration file in the 'docs' directory. diff --git a/changelog.d/4792.bugfix b/changelog.d/4792.bugfix new file mode 100644 index 0000000000..b127b6254f --- /dev/null +++ b/changelog.d/4792.bugfix @@ -0,0 +1 @@ +Handle batch updates in worker replication protocol. \ No newline at end of file diff --git a/changelog.d/4794.misc b/changelog.d/4794.misc new file mode 100644 index 0000000000..99b543ecba --- /dev/null +++ b/changelog.d/4794.misc @@ -0,0 +1 @@ +Removed unnecessary $ from some federation endpoint path regexes. \ No newline at end of file diff --git a/changelog.d/4795.misc b/changelog.d/4795.misc new file mode 100644 index 0000000000..03995f42fe --- /dev/null +++ b/changelog.d/4795.misc @@ -0,0 +1 @@ +Remove link to deleted title in README. \ No newline at end of file diff --git a/changelog.d/4796.feature b/changelog.d/4796.feature new file mode 100644 index 0000000000..9e05560a3f --- /dev/null +++ b/changelog.d/4796.feature @@ -0,0 +1 @@ +Add support for /keys/query and /keys/changes REST endpoints to client_reader worker. diff --git a/changelog.d/4797.misc b/changelog.d/4797.misc new file mode 100644 index 0000000000..822e98e6a7 --- /dev/null +++ b/changelog.d/4797.misc @@ -0,0 +1 @@ +Clean up read-receipt handling. diff --git a/changelog.d/4798.misc b/changelog.d/4798.misc new file mode 100644 index 0000000000..d60f208dc3 --- /dev/null +++ b/changelog.d/4798.misc @@ -0,0 +1 @@ +Add some debug about processing read receipts. diff --git a/changelog.d/4799.misc b/changelog.d/4799.misc new file mode 100644 index 0000000000..5ab11a5c0b --- /dev/null +++ b/changelog.d/4799.misc @@ -0,0 +1 @@ +Clean up some replication code. diff --git a/changelog.d/4801.feature b/changelog.d/4801.feature new file mode 100644 index 0000000000..1e5fd32463 --- /dev/null +++ b/changelog.d/4801.feature @@ -0,0 +1 @@ +Include a default configuration file in the 'docs' directory. diff --git a/changelog.d/4804.feature b/changelog.d/4804.feature new file mode 100644 index 0000000000..a4c0b196f6 --- /dev/null +++ b/changelog.d/4804.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /register endpoint. diff --git a/changelog.d/4814.feature b/changelog.d/4814.feature new file mode 100644 index 0000000000..9433acd959 --- /dev/null +++ b/changelog.d/4814.feature @@ -0,0 +1 @@ +Add checks to incoming events over federation for events evading auth (aka "soft fail"). diff --git a/changelog.d/4815.misc b/changelog.d/4815.misc new file mode 100644 index 0000000000..b123b36f7f --- /dev/null +++ b/changelog.d/4815.misc @@ -0,0 +1 @@ +Add some docstrings. diff --git a/changelog.d/4816.misc b/changelog.d/4816.misc new file mode 100644 index 0000000000..43d94251f7 --- /dev/null +++ b/changelog.d/4816.misc @@ -0,0 +1 @@ +Add debug logger to try and track down #4422. diff --git a/changelog.d/4817.misc b/changelog.d/4817.misc new file mode 100644 index 0000000000..438a51dc63 --- /dev/null +++ b/changelog.d/4817.misc @@ -0,0 +1 @@ +Make shutdown API send explanation message to room after users have been forced joined. diff --git a/changelog.d/4818.bugfix b/changelog.d/4818.bugfix new file mode 100644 index 0000000000..ebbc27a433 --- /dev/null +++ b/changelog.d/4818.bugfix @@ -0,0 +1 @@ +Fix bug where we didn't correctly throttle sending of USER_IP commands over replication. diff --git a/changelog.d/4820.misc b/changelog.d/4820.misc new file mode 100644 index 0000000000..1e35b5b63c --- /dev/null +++ b/changelog.d/4820.misc @@ -0,0 +1 @@ +Update example_log_config.yaml. diff --git a/changelog.d/4821.feature b/changelog.d/4821.feature new file mode 100644 index 0000000000..61d4eb8d60 --- /dev/null +++ b/changelog.d/4821.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /login endpoint. diff --git a/changelog.d/4824.misc b/changelog.d/4824.misc new file mode 100644 index 0000000000..a4c5a1df37 --- /dev/null +++ b/changelog.d/4824.misc @@ -0,0 +1 @@ +Document the `generate` option for the docker image. diff --git a/changelog.d/4825.misc b/changelog.d/4825.misc new file mode 100644 index 0000000000..166661ab6a --- /dev/null +++ b/changelog.d/4825.misc @@ -0,0 +1 @@ +Fix check-newsfragment for debian-only changes. diff --git a/changelog.d/4828.misc b/changelog.d/4828.misc new file mode 100644 index 0000000000..2fe554884a --- /dev/null +++ b/changelog.d/4828.misc @@ -0,0 +1 @@ +Add some debug logging for device list updates to help with #4828. diff --git a/changelog.d/4829.bugfix b/changelog.d/4829.bugfix new file mode 100644 index 0000000000..b05235e215 --- /dev/null +++ b/changelog.d/4829.bugfix @@ -0,0 +1 @@ +Fix potential race in handling missing updates in device list updates. diff --git a/changelog.d/4837.bugfix b/changelog.d/4837.bugfix new file mode 100644 index 0000000000..989aeb82bb --- /dev/null +++ b/changelog.d/4837.bugfix @@ -0,0 +1 @@ +Fix bug where synapse expected an un-specced `prev_state` field on state events. diff --git a/changelog.d/4838.bugfix b/changelog.d/4838.bugfix new file mode 100644 index 0000000000..7f4fceabff --- /dev/null +++ b/changelog.d/4838.bugfix @@ -0,0 +1 @@ +Transfer a user's notification settings (push rules) on room upgrade. \ No newline at end of file diff --git a/changelog.d/4839.misc b/changelog.d/4839.misc new file mode 100644 index 0000000000..7c6868051b --- /dev/null +++ b/changelog.d/4839.misc @@ -0,0 +1 @@ +Disable captcha registration by default in unit tests. \ No newline at end of file diff --git a/changelog.d/4843.misc b/changelog.d/4843.misc new file mode 100644 index 0000000000..03d0a3e2e7 --- /dev/null +++ b/changelog.d/4843.misc @@ -0,0 +1 @@ +Add stuff back to the .gitignore. diff --git a/changelog.d/4844.misc b/changelog.d/4844.misc new file mode 100644 index 0000000000..eff6f1c43c --- /dev/null +++ b/changelog.d/4844.misc @@ -0,0 +1 @@ +Clarify what registration_shared_secret allows for. diff --git a/changelog.d/4846.feature b/changelog.d/4846.feature new file mode 100644 index 0000000000..8f792b8890 --- /dev/null +++ b/changelog.d/4846.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. diff --git a/changelog.d/4847.misc b/changelog.d/4847.misc new file mode 100644 index 0000000000..a001238e08 --- /dev/null +++ b/changelog.d/4847.misc @@ -0,0 +1 @@ +Correctly log expected errors when fetching server keys. diff --git a/changelog.d/4849.misc b/changelog.d/4849.misc new file mode 100644 index 0000000000..f2cab20b44 --- /dev/null +++ b/changelog.d/4849.misc @@ -0,0 +1 @@ +Update install docs to explicitly state a full-chain (not just the top-level) TLS certificate must be provided to Synapse. This caused some people's Synapse ports to appear correct in a browser but still (rightfully so) upset the federation tester. \ No newline at end of file diff --git a/changelog.d/4852.misc b/changelog.d/4852.misc new file mode 100644 index 0000000000..76ab1e34e7 --- /dev/null +++ b/changelog.d/4852.misc @@ -0,0 +1 @@ + Move client read-receipt processing to federation sender worker. \ No newline at end of file diff --git a/changelog.d/4853.feature b/changelog.d/4853.feature new file mode 100644 index 0000000000..360f92e1de --- /dev/null +++ b/changelog.d/4853.feature @@ -0,0 +1 @@ +Allow passing --daemonize flags to workers in the same way as with master. diff --git a/changelog.d/4855.misc b/changelog.d/4855.misc new file mode 100644 index 0000000000..c4906d2f56 --- /dev/null +++ b/changelog.d/4855.misc @@ -0,0 +1 @@ +Refactor federation TransactionQueue. \ No newline at end of file diff --git a/changelog.d/4863.misc b/changelog.d/4863.misc new file mode 100644 index 0000000000..bfe03cbedc --- /dev/null +++ b/changelog.d/4863.misc @@ -0,0 +1 @@ +Comment out most options in the generated config. diff --git a/changelog.d/4864.feature b/changelog.d/4864.feature new file mode 100644 index 0000000000..57927f2620 --- /dev/null +++ b/changelog.d/4864.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. \ No newline at end of file diff --git a/changelog.d/4865.feature b/changelog.d/4865.feature new file mode 100644 index 0000000000..61d4eb8d60 --- /dev/null +++ b/changelog.d/4865.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /login endpoint. diff --git a/contrib/example_log_config.yaml b/contrib/example_log_config.yaml index c7aa68abf2..06592963da 100644 --- a/contrib/example_log_config.yaml +++ b/contrib/example_log_config.yaml @@ -19,6 +19,7 @@ handlers: # example output to console console: class: logging.StreamHandler + formatter: fmt filters: [context] # example output to file - to enable, edit 'root' config below. @@ -29,7 +30,7 @@ handlers: maxBytes: 100000000 backupCount: 3 filters: [context] - + encoding: utf8 root: level: INFO diff --git a/contrib/systemd-with-workers/README.md b/contrib/systemd-with-workers/README.md new file mode 100644 index 0000000000..74b261e9fb --- /dev/null +++ b/contrib/systemd-with-workers/README.md @@ -0,0 +1,150 @@ +# Setup Synapse with Workers and Systemd + +This is a setup for managing synapse with systemd including support for +managing workers. It provides a `matrix-synapse`, as well as a +`matrix-synapse-worker@` service for any workers you require. Additionally to +group the required services it sets up a `matrix.target`. You can use this to +automatically start any bot- or bridge-services. More on this in +[Bots and Bridges](#bots-and-bridges). + +See the folder [system](system) for any service and target files. + +The folder [workers](workers) contains an example configuration for the +`federation_reader` worker. Pay special attention to the name of the +configuration file. In order to work with the `matrix-synapse-worker@.service` +service, it needs to have the exact same name as the worker app. + +This setup expects neither the homeserver nor any workers to fork. Forking is +handled by systemd. + +## Setup + +1. Adjust your matrix configs. Make sure that the worker config files have the +exact same name as the worker app. Compare `matrix-synapse-worker@.service` for +why. You can find an example worker config in the [workers](workers) folder. See +below for relevant settings in the `homeserver.yaml`. +2. Copy the `*.service` and `*.target` files in [system](system) to +`/etc/systemd/system`. +3. `systemctl enable matrix-synapse.service` this adds the homeserver +app to the `matrix.target` +4. *Optional.* `systemctl enable +matrix-synapse-worker@federation_reader.service` this adds the federation_reader +app to the `matrix-synapse.service` +5. *Optional.* Repeat step 4 for any additional workers you require. +6. *Optional.* Add any bots or bridges by enabling them. +7. Start all matrix related services via `systemctl start matrix.target` +8. *Optional.* Enable autostart of all matrix related services on system boot +via `systemctl enable matrix.target` + +## Usage + +After you have setup you can use the following commands to manage your synapse +installation: + +``` +# Start matrix-synapse, all workers and any enabled bots or bridges. +systemctl start matrix.target + +# Restart matrix-synapse and all workers (not necessarily restarting bots +# or bridges, see "Bots and Bridges") +systemctl restart matrix-synapse.service + +# Stop matrix-synapse and all workers (not necessarily restarting bots +# or bridges, see "Bots and Bridges") +systemctl stop matrix-synapse.service + +# Restart a specific worker (i. e. federation_reader), the homeserver is +# unaffected by this. +systemctl restart matrix-synapse-worker@federation_reader.service + +# Add a new worker (assuming all configs are setup already) +systemctl enable matrix-synapse-worker@federation_writer.service +systemctl restart matrix-synapse.service +``` + +## The Configs + +Make sure the `worker_app` is set in the `homeserver.yaml` and it does not fork. + +``` +worker_app: synapse.app.homeserver +daemonize: false +``` + +None of the workers should fork, as forking is handled by systemd. Hence make +sure this is present in all worker config files. + +``` +worker_daemonize: false +``` + +The config files of all workers are expected to be located in +`/etc/matrix-synapse/workers`. If you want to use a different location you have +to edit the provided `*.service` files accordingly. + +## Bots and Bridges + +Most bots and bridges do not care if the homeserver goes down or is restarted. +Depending on the implementation this may crash them though. So look up the docs +or ask the community of the specific bridge or bot you want to run to make sure +you choose the correct setup. + +Whichever configuration you choose, after the setup the following will enable +automatically starting (and potentially restarting) your bot/bridge with the +`matrix.target`. + +``` +systemctl enable <yourBotOrBridgeName>.service +``` + +**Note** that from an inactive synapse the bots/bridges will only be started with +synapse if you start the `matrix.target`, not if you start the +`matrix-synapse.service`. This is on purpose. Think of `matrix-synapse.service` +as *just* synapse, but `matrix.target` being anything matrix related, including +synapse and any and all enabled bots and bridges. + +### Start with synapse but ignore synapse going down + +If the bridge can handle shutdowns of the homeserver you'll want to install the +service in the `matrix.target` and optionally add a +`After=matrix-synapse.service` dependency to have the bot/bridge start after +synapse on starting everything. + +In this case the service file should look like this. + +``` +[Unit] +# ... +# Optional, this will only ensure that if you start everything, synapse will +# be started before the bot/bridge will be started. +After=matrix-synapse.service + +[Service] +# ... + +[Install] +WantedBy=matrix.target +``` + +### Stop/restart when synapse stops/restarts + +If the bridge can't handle shutdowns of the homeserver you'll still want to +install the service in the `matrix.target` but also have to specify the +`After=matrix-synapse.service` *and* `BindsTo=matrix-synapse.service` +dependencies to have the bot/bridge stop/restart with synapse. + +In this case the service file should look like this. + +``` +[Unit] +# ... +# Mandatory +After=matrix-synapse.service +BindsTo=matrix-synapse.service + +[Service] +# ... + +[Install] +WantedBy=matrix.target +``` diff --git a/contrib/systemd-with-workers/system/matrix-synapse-worker@.service b/contrib/systemd-with-workers/system/matrix-synapse-worker@.service new file mode 100644 index 0000000000..912984b9d2 --- /dev/null +++ b/contrib/systemd-with-workers/system/matrix-synapse-worker@.service @@ -0,0 +1,17 @@ +[Unit] +Description=Synapse Matrix Worker +After=matrix-synapse.service +BindsTo=matrix-synapse.service + +[Service] +Type=simple +User=matrix-synapse +WorkingDirectory=/var/lib/matrix-synapse +EnvironmentFile=/etc/default/matrix-synapse +ExecStart=/opt/venvs/matrix-synapse/bin/python -m synapse.app.%i --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ --config-path=/etc/matrix-synapse/workers/%i.yaml +ExecReload=/bin/kill -HUP $MAINPID +Restart=always +RestartSec=3 + +[Install] +WantedBy=matrix-synapse.service diff --git a/contrib/systemd-with-workers/system/matrix-synapse.service b/contrib/systemd-with-workers/system/matrix-synapse.service new file mode 100644 index 0000000000..8bb4e400dc --- /dev/null +++ b/contrib/systemd-with-workers/system/matrix-synapse.service @@ -0,0 +1,16 @@ +[Unit] +Description=Synapse Matrix Homeserver + +[Service] +Type=simple +User=matrix-synapse +WorkingDirectory=/var/lib/matrix-synapse +EnvironmentFile=/etc/default/matrix-synapse +ExecStartPre=/opt/venvs/matrix-synapse/bin/python -m synapse.app.homeserver --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ --generate-keys +ExecStart=/opt/venvs/matrix-synapse/bin/python -m synapse.app.homeserver --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ +ExecReload=/bin/kill -HUP $MAINPID +Restart=always +RestartSec=3 + +[Install] +WantedBy=matrix.target diff --git a/contrib/systemd-with-workers/system/matrix.target b/contrib/systemd-with-workers/system/matrix.target new file mode 100644 index 0000000000..aff97d03ef --- /dev/null +++ b/contrib/systemd-with-workers/system/matrix.target @@ -0,0 +1,7 @@ +[Unit] +Description=Contains matrix services like synapse, bridges and bots +After=network.target +AllowIsolate=no + +[Install] +WantedBy=multi-user.target diff --git a/contrib/systemd-with-workers/workers/federation_reader.yaml b/contrib/systemd-with-workers/workers/federation_reader.yaml new file mode 100644 index 0000000000..47c54ec0d4 --- /dev/null +++ b/contrib/systemd-with-workers/workers/federation_reader.yaml @@ -0,0 +1,14 @@ +worker_app: synapse.app.federation_reader + +worker_replication_host: 127.0.0.1 +worker_replication_port: 9092 +worker_replication_http_port: 9093 + +worker_listeners: + - type: http + port: 8011 + resources: + - names: [federation] + +worker_daemonize: false +worker_log_config: /etc/matrix-synapse/federation-reader-log.yaml diff --git a/debian/changelog b/debian/changelog index fd77ce13a2..d84931ec03 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (0.99.3) UNRELEASED; urgency=medium + + * Fix warning during preconfiguration. (Fixes: #4819) + + -- Richard van der Hoff <richard@matrix.org> Thu, 07 Mar 2019 07:17:00 +0000 + matrix-synapse-py3 (0.99.2) stable; urgency=medium * Fix overwriting of config settings on upgrade. diff --git a/debian/matrix-synapse-py3.config b/debian/matrix-synapse-py3.config index 3bda3292f1..37a781b3e8 100755 --- a/debian/matrix-synapse-py3.config +++ b/debian/matrix-synapse-py3.config @@ -5,7 +5,11 @@ set -e . /usr/share/debconf/confmodule # try to update the debconf db according to whatever is in the config files -/opt/venvs/matrix-synapse/lib/manage_debconf.pl read || true +# +# note that we may get run during preconfiguration, in which case the script +# will not yet be installed. +[ -x /opt/venvs/matrix-synapse/lib/manage_debconf.pl ] && \ + /opt/venvs/matrix-synapse/lib/manage_debconf.pl read db_input high matrix-synapse/server-name || true db_input high matrix-synapse/report-stats || true diff --git a/docker/README.md b/docker/README.md index 3faedf629f..4b98b7fd75 100644 --- a/docker/README.md +++ b/docker/README.md @@ -28,7 +28,7 @@ with your postgres database. docker run \ -d \ --name synapse \ - -v ${DATA_PATH}:/data \ + --mount type=volume,src=synapse-data,dst=/data \ -e SYNAPSE_SERVER_NAME=my.matrix.host \ -e SYNAPSE_REPORT_STATS=yes \ matrixdotorg/synapse:latest @@ -87,10 +87,15 @@ Global settings: * ``SYNAPSE_CONFIG_PATH``, path to a custom config file If ``SYNAPSE_CONFIG_PATH`` is set, you should generate a configuration file -then customize it manually. No other environment variable is required. +then customize it manually: see [Generating a config +file](#generating-a-config-file). -Otherwise, a dynamic configuration file will be used. The following environment -variables are available for configuration: +Otherwise, a dynamic configuration file will be used. + +### Environment variables used to build a dynamic configuration file + +The following environment variables are used to build the configuration file +when ``SYNAPSE_CONFIG_PATH`` is not set. * ``SYNAPSE_SERVER_NAME`` (mandatory), the server public hostname. * ``SYNAPSE_REPORT_STATS``, (mandatory, ``yes`` or ``no``), enable anonymous @@ -143,3 +148,31 @@ Mail server specific values (will not send emails if not set): any. * ``SYNAPSE_SMTP_PASSWORD``, password for authenticating against the mail server if any. + +### Generating a config file + +It is possible to generate a basic configuration file for use with +`SYNAPSE_CONFIG_PATH` using the `generate` commandline option. You will need to +specify values for `SYNAPSE_CONFIG_PATH`, `SYNAPSE_SERVER_NAME` and +`SYNAPSE_REPORT_STATS`, and mount a docker volume to store the data on. For +example: + +``` +docker run -it --rm + --mount type=volume,src=synapse-data,dst=/data \ + -e SYNAPSE_CONFIG_PATH=/data/homeserver.yaml \ + -e SYNAPSE_SERVER_NAME=my.matrix.host \ + -e SYNAPSE_REPORT_STATS=yes \ + matrixdotorg/synapse:latest generate +``` + +This will generate a `homeserver.yaml` in (typically) +`/var/lib/docker/volumes/synapse-data/_data`, which you can then customise and +use with: + +``` +docker run -d --name synapse \ + --mount type=volume,src=synapse-data,dst=/data \ + -e SYNAPSE_CONFIG_PATH=/data/homeserver.yaml \ + matrixdotorg/synapse:latest +``` diff --git a/docs/.sample_config_header.yaml b/docs/.sample_config_header.yaml new file mode 100644 index 0000000000..e001ef5983 --- /dev/null +++ b/docs/.sample_config_header.yaml @@ -0,0 +1,12 @@ +# The config is maintained as an up-to-date snapshot of the default +# homeserver.yaml configuration generated by Synapse. +# +# It is intended to act as a reference for the default configuration, +# helping admins keep track of new options and other changes, and compare +# their configs with the current default. As such, many of the actual +# config values shown are placeholders. +# +# It is *not* intended to be copied and used as the basis for a real +# homeserver.yaml. Instead, if you are starting from scratch, please generate +# a fresh config using Synapse by following the instructions in INSTALL.md. + diff --git a/docs/admin_api/version_api.rst b/docs/admin_api/version_api.rst new file mode 100644 index 0000000000..30a91b5f43 --- /dev/null +++ b/docs/admin_api/version_api.rst @@ -0,0 +1,22 @@ +Version API +=========== + +This API returns the running Synapse version and the Python version +on which Synapse is being run. This is useful when a Synapse instance +is behind a proxy that does not forward the 'Server' header (which also +contains Synapse version information). + +The api is:: + + GET /_matrix/client/r0/admin/server_version + +including an ``access_token`` of a server admin. + +It returns a JSON body like the following: + +.. code:: json + + { + "server_version": "0.99.2rc1 (b=develop, abcdef123)", + "python_version": "3.6.8" + } diff --git a/docs/reverse_proxy.rst b/docs/reverse_proxy.rst index cc6e66a8f3..8e26c50f1b 100644 --- a/docs/reverse_proxy.rst +++ b/docs/reverse_proxy.rst @@ -88,18 +88,16 @@ Let's assume that we expect clients to connect to our server at * HAProxy:: frontend https - bind 0.0.0.0:443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 - bind :::443 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 - + bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 + # Matrix client traffic acl matrix hdr(host) -i matrix.example.com use_backend matrix if matrix - + frontend matrix-federation - bind 0.0.0.0:8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 - bind :::8448 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 + bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 default_backend matrix - + backend matrix server matrix 127.0.0.1:8008 diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml new file mode 100644 index 0000000000..f9886a900d --- /dev/null +++ b/docs/sample_config.yaml @@ -0,0 +1,1094 @@ +# The config is maintained as an up-to-date snapshot of the default +# homeserver.yaml configuration generated by Synapse. +# +# It is intended to act as a reference for the default configuration, +# helping admins keep track of new options and other changes, and compare +# their configs with the current default. As such, many of the actual +# config values shown are placeholders. +# +# It is *not* intended to be copied and used as the basis for a real +# homeserver.yaml. Instead, if you are starting from scratch, please generate +# a fresh config using Synapse by following the instructions in INSTALL.md. + +## Server ## + +# The domain name of the server, with optional explicit port. +# This is used by remote servers to connect to this server, +# e.g. matrix.org, localhost:8080, etc. +# This is also the last part of your UserID. +# +server_name: "SERVERNAME" + +# When running as a daemon, the file to store the pid in +# +pid_file: DATADIR/homeserver.pid + +# CPU affinity mask. Setting this restricts the CPUs on which the +# process will be scheduled. It is represented as a bitmask, with the +# lowest order bit corresponding to the first logical CPU and the +# highest order bit corresponding to the last logical CPU. Not all CPUs +# may exist on a given system but a mask may specify more CPUs than are +# present. +# +# For example: +# 0x00000001 is processor #0, +# 0x00000003 is processors #0 and #1, +# 0xFFFFFFFF is all processors (#0 through #31). +# +# Pinning a Python process to a single CPU is desirable, because Python +# is inherently single-threaded due to the GIL, and can suffer a +# 30-40% slowdown due to cache blow-out and thread context switching +# if the scheduler happens to schedule the underlying threads across +# different cores. See +# https://www.mirantis.com/blog/improve-performance-python-programs-restricting-single-cpu/. +# +# This setting requires the affinity package to be installed! +# +#cpu_affinity: 0xFFFFFFFF + +# The path to the web client which will be served at /_matrix/client/ +# if 'webclient' is configured under the 'listeners' configuration. +# +#web_client_location: "/path/to/web/root" + +# The public-facing base URL that clients use to access this HS +# (not including _matrix/...). This is the same URL a user would +# enter into the 'custom HS URL' field on their client. If you +# use synapse with a reverse proxy, this should be the URL to reach +# synapse via the proxy. +# +#public_baseurl: https://example.com/ + +# Set the soft limit on the number of file descriptors synapse can use +# Zero is used to indicate synapse should set the soft limit to the +# hard limit. +# +#soft_file_limit: 0 + +# Set to false to disable presence tracking on this homeserver. +# +#use_presence: false + +# The GC threshold parameters to pass to `gc.set_threshold`, if defined +# +#gc_thresholds: [700, 10, 10] + +# Set the limit on the returned events in the timeline in the get +# and sync operations. The default value is -1, means no upper limit. +# +#filter_timeline_limit: 5000 + +# Whether room invites to users on this server should be blocked +# (except those sent by local server admins). The default is False. +# +#block_non_admin_invites: True + +# Room searching +# +# If disabled, new messages will not be indexed for searching and users +# will receive errors when searching for messages. Defaults to enabled. +# +#enable_search: false + +# Restrict federation to the following whitelist of domains. +# N.B. we recommend also firewalling your federation listener to limit +# inbound federation traffic as early as possible, rather than relying +# purely on this application-layer restriction. If not specified, the +# default is to whitelist everything. +# +#federation_domain_whitelist: +# - lon.example.com +# - nyc.example.com +# - syd.example.com + +# List of ports that Synapse should listen on, their purpose and their +# configuration. +# +# Options for each listener include: +# +# port: the TCP port to bind to +# +# bind_addresses: a list of local addresses to listen on. The default is +# 'all local interfaces'. +# +# type: the type of listener. Normally 'http', but other valid options are: +# 'manhole' (see docs/manhole.md), +# 'metrics' (see docs/metrics-howto.rst), +# 'replication' (see docs/workers.rst). +# +# tls: set to true to enable TLS for this listener. Will use the TLS +# key/cert specified in tls_private_key_path / tls_certificate_path. +# +# x_forwarded: Only valid for an 'http' listener. Set to true to use the +# X-Forwarded-For header as the client IP. Useful when Synapse is +# behind a reverse-proxy. +# +# resources: Only valid for an 'http' listener. A list of resources to host +# on this port. Options for each resource are: +# +# names: a list of names of HTTP resources. See below for a list of +# valid resource names. +# +# compress: set to true to enable HTTP comression for this resource. +# +# additional_resources: Only valid for an 'http' listener. A map of +# additional endpoints which should be loaded via dynamic modules. +# +# Valid resource names are: +# +# client: the client-server API (/_matrix/client). Also implies 'media' and +# 'static'. +# +# consent: user consent forms (/_matrix/consent). See +# docs/consent_tracking.md. +# +# federation: the server-server API (/_matrix/federation). Also implies +# 'media', 'keys', 'openid' +# +# keys: the key discovery API (/_matrix/keys). +# +# media: the media API (/_matrix/media). +# +# metrics: the metrics interface. See docs/metrics-howto.rst. +# +# openid: OpenID authentication. +# +# replication: the HTTP replication API (/_synapse/replication). See +# docs/workers.rst. +# +# static: static resources under synapse/static (/_matrix/static). (Mostly +# useful for 'fallback authentication'.) +# +# webclient: A web client. Requires web_client_location to be set. +# +listeners: + # TLS-enabled listener: for when matrix traffic is sent directly to synapse. + # + # Disabled by default. To enable it, uncomment the following. (Note that you + # will also need to give Synapse a TLS key and certificate: see the TLS section + # below.) + # + #- port: 8448 + # type: http + # tls: true + # resources: + # - names: [client, federation] + + # Unsecure HTTP listener: for when matrix traffic passes through a reverse proxy + # that unwraps TLS. + # + # If you plan to use a reverse proxy, please see + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # + - port: 8008 + tls: false + bind_addresses: ['::1', '127.0.0.1'] + type: http + x_forwarded: true + + resources: + - names: [client, federation] + compress: false + + # example additonal_resources: + # + #additional_resources: + # "/_matrix/my/custom/endpoint": + # module: my_module.CustomRequestHandler + # config: {} + + # Turn on the twisted ssh manhole service on localhost on the given + # port. + # + #- port: 9000 + # bind_addresses: ['::1', '127.0.0.1'] + # type: manhole + + +## Homeserver blocking ## + +# How to reach the server admin, used in ResourceLimitError +# +#admin_contact: 'mailto:admin@server.com' + +# Global blocking +# +#hs_disabled: False +#hs_disabled_message: 'Human readable reason for why the HS is blocked' +#hs_disabled_limit_type: 'error code(str), to help clients decode reason' + +# Monthly Active User Blocking +# +#limit_usage_by_mau: False +#max_mau_value: 50 +#mau_trial_days: 2 + +# If enabled, the metrics for the number of monthly active users will +# be populated, however no one will be limited. If limit_usage_by_mau +# is true, this is implied to be true. +# +#mau_stats_only: False + +# Sometimes the server admin will want to ensure certain accounts are +# never blocked by mau checking. These accounts are specified here. +# +#mau_limit_reserved_threepids: +# - medium: 'email' +# address: 'reserved_user@example.com' + + +## TLS ## + +# PEM-encoded X509 certificate for TLS. +# This certificate, as of Synapse 1.0, will need to be a valid and verifiable +# certificate, signed by a recognised Certificate Authority. +# +# See 'ACME support' below to enable auto-provisioning this certificate via +# Let's Encrypt. +# +# If supplying your own, be sure to use a `.pem` file that includes the +# full certificate chain including any intermediate certificates (for +# instance, if using certbot, use `fullchain.pem` as your certificate, +# not `cert.pem`). +# +#tls_certificate_path: "CONFDIR/SERVERNAME.tls.crt" + +# PEM-encoded private key for TLS +# +#tls_private_key_path: "CONFDIR/SERVERNAME.tls.key" + +# ACME support: This will configure Synapse to request a valid TLS certificate +# for your configured `server_name` via Let's Encrypt. +# +# Note that provisioning a certificate in this way requires port 80 to be +# routed to Synapse so that it can complete the http-01 ACME challenge. +# By default, if you enable ACME support, Synapse will attempt to listen on +# port 80 for incoming http-01 challenges - however, this will likely fail +# with 'Permission denied' or a similar error. +# +# There are a couple of potential solutions to this: +# +# * If you already have an Apache, Nginx, or similar listening on port 80, +# you can configure Synapse to use an alternate port, and have your web +# server forward the requests. For example, assuming you set 'port: 8009' +# below, on Apache, you would write: +# +# ProxyPass /.well-known/acme-challenge http://localhost:8009/.well-known/acme-challenge +# +# * Alternatively, you can use something like `authbind` to give Synapse +# permission to listen on port 80. +# +acme: + # ACME support is disabled by default. Uncomment the following line + # (and tls_certificate_path and tls_private_key_path above) to enable it. + # + #enabled: true + + # Endpoint to use to request certificates. If you only want to test, + # use Let's Encrypt's staging url: + # https://acme-staging.api.letsencrypt.org/directory + # + #url: https://acme-v01.api.letsencrypt.org/directory + + # Port number to listen on for the HTTP-01 challenge. Change this if + # you are forwarding connections through Apache/Nginx/etc. + # + #port: 80 + + # Local addresses to listen on for incoming connections. + # Again, you may want to change this if you are forwarding connections + # through Apache/Nginx/etc. + # + #bind_addresses: ['::', '0.0.0.0'] + + # How many days remaining on a certificate before it is renewed. + # + #reprovision_threshold: 30 + + # The domain that the certificate should be for. Normally this + # should be the same as your Matrix domain (i.e., 'server_name'), but, + # by putting a file at 'https://<server_name>/.well-known/matrix/server', + # you can delegate incoming traffic to another server. If you do that, + # you should give the target of the delegation here. + # + # For example: if your 'server_name' is 'example.com', but + # 'https://example.com/.well-known/matrix/server' delegates to + # 'matrix.example.com', you should put 'matrix.example.com' here. + # + # If not set, defaults to your 'server_name'. + # + #domain: matrix.example.com + +# List of allowed TLS fingerprints for this server to publish along +# with the signing keys for this server. Other matrix servers that +# make HTTPS requests to this server will check that the TLS +# certificates returned by this server match one of the fingerprints. +# +# Synapse automatically adds the fingerprint of its own certificate +# to the list. So if federation traffic is handled directly by synapse +# then no modification to the list is required. +# +# If synapse is run behind a load balancer that handles the TLS then it +# will be necessary to add the fingerprints of the certificates used by +# the loadbalancers to this list if they are different to the one +# synapse is using. +# +# Homeservers are permitted to cache the list of TLS fingerprints +# returned in the key responses up to the "valid_until_ts" returned in +# key. It may be necessary to publish the fingerprints of a new +# certificate and wait until the "valid_until_ts" of the previous key +# responses have passed before deploying it. +# +# You can calculate a fingerprint from a given TLS listener via: +# openssl s_client -connect $host:$port < /dev/null 2> /dev/null | +# openssl x509 -outform DER | openssl sha256 -binary | base64 | tr -d '=' +# or by checking matrix.org/federationtester/api/report?server_name=$host +# +#tls_fingerprints: [{"sha256": "<base64_encoded_sha256_fingerprint>"}] + + + +## Database ## + +database: + # The database engine name + name: "sqlite3" + # Arguments to pass to the engine + args: + # Path to the database + database: "DATADIR/homeserver.db" + +# Number of events to cache in memory. +# +#event_cache_size: 10K + + +## Logging ## + +# A yaml python logging config file +# +log_config: "CONFDIR/SERVERNAME.log.config" + + +## Ratelimiting ## + +# Number of messages a client can send per second +# +#rc_messages_per_second: 0.2 + +# Number of message a client can send before being throttled +# +#rc_message_burst_count: 10.0 + +# Ratelimiting settings for registration and login. +# +# Each ratelimiting configuration is made of two parameters: +# - per_second: number of requests a client can send per second. +# - burst_count: number of requests a client can send before being throttled. +# +# Synapse currently uses the following configurations: +# - one for registration that ratelimits registration requests based on the +# client's IP address. +# - one for login that ratelimits login requests based on the client's IP +# address. +# - one for login that ratelimits login requests based on the account the +# client is attempting to log into. +# - one for login that ratelimits login requests based on the account the +# client is attempting to log into, based on the amount of failed login +# attempts for this account. +# +# The defaults are as shown below. +# +#rc_registration: +# per_second: 0.17 +# burst_count: 3 +# +#rc_login: +# address: +# per_second: 0.17 +# burst_count: 3 +# account: +# per_second: 0.17 +# burst_count: 3 +# failed_attempts: +# per_second: 0.17 +# burst_count: 3 + +# The federation window size in milliseconds +# +#federation_rc_window_size: 1000 + +# The number of federation requests from a single server in a window +# before the server will delay processing the request. +# +#federation_rc_sleep_limit: 10 + +# The duration in milliseconds to delay processing events from +# remote servers by if they go over the sleep limit. +# +#federation_rc_sleep_delay: 500 + +# The maximum number of concurrent federation requests allowed +# from a single server +# +#federation_rc_reject_limit: 50 + +# The number of federation requests to concurrently process from a +# single server +# +#federation_rc_concurrent: 3 + + + +# Directory where uploaded images and attachments are stored. +# +media_store_path: "DATADIR/media_store" + +# Media storage providers allow media to be stored in different +# locations. +# +#media_storage_providers: +# - module: file_system +# # Whether to write new local files. +# store_local: false +# # Whether to write new remote media +# store_remote: false +# # Whether to block upload requests waiting for write to this +# # provider to complete +# store_synchronous: false +# config: +# directory: /mnt/some/other/directory + +# Directory where in-progress uploads are stored. +# +uploads_path: "DATADIR/uploads" + +# The largest allowed upload size in bytes +# +#max_upload_size: 10M + +# Maximum number of pixels that will be thumbnailed +# +#max_image_pixels: 32M + +# Whether to generate new thumbnails on the fly to precisely match +# the resolution requested by the client. If true then whenever +# a new resolution is requested by the client the server will +# generate a new thumbnail. If false the server will pick a thumbnail +# from a precalculated list. +# +#dynamic_thumbnails: false + +# List of thumbnails to precalculate when an image is uploaded. +# +#thumbnail_sizes: +# - width: 32 +# height: 32 +# method: crop +# - width: 96 +# height: 96 +# method: crop +# - width: 320 +# height: 240 +# method: scale +# - width: 640 +# height: 480 +# method: scale +# - width: 800 +# height: 600 +# method: scale + +# Is the preview URL API enabled? If enabled, you *must* specify +# an explicit url_preview_ip_range_blacklist of IPs that the spider is +# denied from accessing. +# +#url_preview_enabled: false + +# List of IP address CIDR ranges that the URL preview spider is denied +# from accessing. There are no defaults: you must explicitly +# specify a list for URL previewing to work. You should specify any +# internal services in your network that you do not want synapse to try +# to connect to, otherwise anyone in any Matrix room could cause your +# synapse to issue arbitrary GET requests to your internal services, +# causing serious security issues. +# +#url_preview_ip_range_blacklist: +# - '127.0.0.0/8' +# - '10.0.0.0/8' +# - '172.16.0.0/12' +# - '192.168.0.0/16' +# - '100.64.0.0/10' +# - '169.254.0.0/16' +# - '::1/128' +# - 'fe80::/64' +# - 'fc00::/7' +# +# List of IP address CIDR ranges that the URL preview spider is allowed +# to access even if they are specified in url_preview_ip_range_blacklist. +# This is useful for specifying exceptions to wide-ranging blacklisted +# target IP ranges - e.g. for enabling URL previews for a specific private +# website only visible in your network. +# +#url_preview_ip_range_whitelist: +# - '192.168.1.1' + +# Optional list of URL matches that the URL preview spider is +# denied from accessing. You should use url_preview_ip_range_blacklist +# in preference to this, otherwise someone could define a public DNS +# entry that points to a private IP address and circumvent the blacklist. +# This is more useful if you know there is an entire shape of URL that +# you know that will never want synapse to try to spider. +# +# Each list entry is a dictionary of url component attributes as returned +# by urlparse.urlsplit as applied to the absolute form of the URL. See +# https://docs.python.org/2/library/urlparse.html#urlparse.urlsplit +# The values of the dictionary are treated as an filename match pattern +# applied to that component of URLs, unless they start with a ^ in which +# case they are treated as a regular expression match. If all the +# specified component matches for a given list item succeed, the URL is +# blacklisted. +# +#url_preview_url_blacklist: +# # blacklist any URL with a username in its URI +# - username: '*' +# +# # blacklist all *.google.com URLs +# - netloc: 'google.com' +# - netloc: '*.google.com' +# +# # blacklist all plain HTTP URLs +# - scheme: 'http' +# +# # blacklist http(s)://www.acme.com/foo +# - netloc: 'www.acme.com' +# path: '/foo' +# +# # blacklist any URL with a literal IPv4 address +# - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' + +# The largest allowed URL preview spidering size in bytes +# +#max_spider_size: 10M + + +## Captcha ## +# See docs/CAPTCHA_SETUP for full details of configuring this. + +# This Home Server's ReCAPTCHA public key. +# +#recaptcha_public_key: "YOUR_PUBLIC_KEY" + +# This Home Server's ReCAPTCHA private key. +# +#recaptcha_private_key: "YOUR_PRIVATE_KEY" + +# Enables ReCaptcha checks when registering, preventing signup +# unless a captcha is answered. Requires a valid ReCaptcha +# public/private key. +# +#enable_registration_captcha: false + +# A secret key used to bypass the captcha test entirely. +# +#captcha_bypass_secret: "YOUR_SECRET_HERE" + +# The API endpoint to use for verifying m.login.recaptcha responses. +# +#recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" + + +## TURN ## + +# The public URIs of the TURN server to give to clients +# +#turn_uris: [] + +# The shared secret used to compute passwords for the TURN server +# +#turn_shared_secret: "YOUR_SHARED_SECRET" + +# The Username and password if the TURN server needs them and +# does not use a token +# +#turn_username: "TURNSERVER_USERNAME" +#turn_password: "TURNSERVER_PASSWORD" + +# How long generated TURN credentials last +# +#turn_user_lifetime: 1h + +# Whether guests should be allowed to use the TURN server. +# This defaults to True, otherwise VoIP will be unreliable for guests. +# However, it does introduce a slight security risk as it allows users to +# connect to arbitrary endpoints without having first signed up for a +# valid account (e.g. by passing a CAPTCHA). +# +#turn_allow_guests: True + + +## Registration ## +# +# Registration can be rate-limited using the parameters in the "Ratelimiting" +# section of this file. + +# Enable registration for new users. +# +#enable_registration: false + +# The user must provide all of the below types of 3PID when registering. +# +#registrations_require_3pid: +# - email +# - msisdn + +# Explicitly disable asking for MSISDNs from the registration +# flow (overrides registrations_require_3pid if MSISDNs are set as required) +# +#disable_msisdn_registration: true + +# Mandate that users are only allowed to associate certain formats of +# 3PIDs with accounts on this server. +# +#allowed_local_3pids: +# - medium: email +# pattern: '.*@matrix\.org' +# - medium: email +# pattern: '.*@vector\.im' +# - medium: msisdn +# pattern: '\+44' + +# If set, allows registration of standard or admin accounts by anyone who +# has the shared secret, even if registration is otherwise disabled. +# +# registration_shared_secret: <PRIVATE STRING> + +# Set the number of bcrypt rounds used to generate password hash. +# Larger numbers increase the work factor needed to generate the hash. +# The default number is 12 (which equates to 2^12 rounds). +# N.B. that increasing this will exponentially increase the time required +# to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. +# +#bcrypt_rounds: 12 + +# Allows users to register as guests without a password/email/etc, and +# participate in rooms hosted on this server which have been made +# accessible to anonymous users. +# +#allow_guest_access: false + +# The identity server which we suggest that clients should use when users log +# in on this server. +# +# (By default, no suggestion is made, so it is left up to the client. +# This setting is ignored unless public_baseurl is also set.) +# +#default_identity_server: https://matrix.org + +# The list of identity servers trusted to verify third party +# identifiers by this server. +# +# Also defines the ID server which will be called when an account is +# deactivated (one will be picked arbitrarily). +# +#trusted_third_party_id_servers: +# - matrix.org +# - vector.im + +# Users who register on this homeserver will automatically be joined +# to these rooms +# +#auto_join_rooms: +# - "#example:example.com" + +# Where auto_join_rooms are specified, setting this flag ensures that the +# the rooms exist by creating them when the first user on the +# homeserver registers. +# Setting to false means that if the rooms are not manually created, +# users cannot be auto-joined since they do not exist. +# +#autocreate_auto_join_rooms: true + + +## Metrics ### + +# Enable collection and rendering of performance metrics +# +#enable_metrics: False + +# Enable sentry integration +# NOTE: While attempts are made to ensure that the logs don't contain +# any sensitive information, this cannot be guaranteed. By enabling +# this option the sentry server may therefore receive sensitive +# information, and it in turn may then diseminate sensitive information +# through insecure notification channels if so configured. +# +#sentry: +# dsn: "..." + +# Whether or not to report anonymized homeserver usage statistics. +# report_stats: true|false + + +## API Configuration ## + +# A list of event types that will be included in the room_invite_state +# +#room_invite_state_types: +# - "m.room.join_rules" +# - "m.room.canonical_alias" +# - "m.room.avatar" +# - "m.room.encryption" +# - "m.room.name" + + +# A list of application service config files to use +# +#app_service_config_files: +# - app_service_1.yaml +# - app_service_2.yaml + +# Uncomment to enable tracking of application service IP addresses. Implicitly +# enables MAU tracking for application service users. +# +#track_appservice_user_ips: True + + +# a secret which is used to sign access tokens. If none is specified, +# the registration_shared_secret is used, if one is given; otherwise, +# a secret key is derived from the signing key. +# +# macaroon_secret_key: <PRIVATE STRING> + +# Used to enable access token expiration. +# +#expire_access_token: False + +# a secret which is used to calculate HMACs for form values, to stop +# falsification of values. Must be specified for the User Consent +# forms to work. +# +# form_secret: <PRIVATE STRING> + +## Signing Keys ## + +# Path to the signing key to sign messages with +# +signing_key_path: "CONFDIR/SERVERNAME.signing.key" + +# The keys that the server used to sign messages with but won't use +# to sign new messages. E.g. it has lost its private key +# +#old_signing_keys: +# "ed25519:auto": +# # Base64 encoded public key +# key: "The public part of your old signing key." +# # Millisecond POSIX timestamp when the key expired. +# expired_ts: 123456789123 + +# How long key response published by this server is valid for. +# Used to set the valid_until_ts in /key/v2 APIs. +# Determines how quickly servers will query to check which keys +# are still valid. +# +#key_refresh_interval: 1d + +# The trusted servers to download signing keys from. +# +#perspectives: +# servers: +# "matrix.org": +# verify_keys: +# "ed25519:auto": +# key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" + + +# Enable SAML2 for registration and login. Uses pysaml2. +# +# `sp_config` is the configuration for the pysaml2 Service Provider. +# See pysaml2 docs for format of config. +# +# Default values will be used for the 'entityid' and 'service' settings, +# so it is not normally necessary to specify them unless you need to +# override them. +# +#saml2_config: +# sp_config: +# # point this to the IdP's metadata. You can use either a local file or +# # (preferably) a URL. +# metadata: +# #local: ["saml2/idp.xml"] +# remote: +# - url: https://our_idp/metadata.xml +# +# # The rest of sp_config is just used to generate our metadata xml, and you +# # may well not need it, depending on your setup. Alternatively you +# # may need a whole lot more detail - see the pysaml2 docs! +# +# description: ["My awesome SP", "en"] +# name: ["Test SP", "en"] +# +# organization: +# name: Example com +# display_name: +# - ["Example co", "en"] +# url: "http://example.com" +# +# contact_person: +# - given_name: Bob +# sur_name: "the Sysadmin" +# email_address": ["admin@example.com"] +# contact_type": technical +# +# # Instead of putting the config inline as above, you can specify a +# # separate pysaml2 configuration file: +# # +# config_path: "CONFDIR/sp_conf.py" + + + +# Enable CAS for registration and login. +# +#cas_config: +# enabled: true +# server_url: "https://cas-server.com" +# service_url: "https://homeserver.domain.com:8448" +# #required_attributes: +# # name: value + + +# The JWT needs to contain a globally unique "sub" (subject) claim. +# +#jwt_config: +# enabled: true +# secret: "a secret" +# algorithm: "HS256" + + +password_config: + # Uncomment to disable password login + # + #enabled: false + + # Uncomment and change to a secret random string for extra security. + # DO NOT CHANGE THIS AFTER INITIAL SETUP! + # + #pepper: "EVEN_MORE_SECRET" + + + +# Enable sending emails for notification events +# Defining a custom URL for Riot is only needed if email notifications +# should contain links to a self-hosted installation of Riot; when set +# the "app_name" setting is ignored. +# +# If your SMTP server requires authentication, the optional smtp_user & +# smtp_pass variables should be used +# +#email: +# enable_notifs: false +# smtp_host: "localhost" +# smtp_port: 25 +# smtp_user: "exampleusername" +# smtp_pass: "examplepassword" +# require_transport_security: False +# notif_from: "Your Friendly %(app)s Home Server <noreply@example.com>" +# app_name: Matrix +# # if template_dir is unset, uses the example templates that are part of +# # the Synapse distribution. +# #template_dir: res/templates +# notif_template_html: notif_mail.html +# notif_template_text: notif_mail.txt +# notif_for_new_users: True +# riot_base_url: "http://localhost/riot" + + +#password_providers: +# - module: "ldap_auth_provider.LdapAuthProvider" +# config: +# enabled: true +# uri: "ldap://ldap.example.com:389" +# start_tls: true +# base: "ou=users,dc=example,dc=com" +# attributes: +# uid: "cn" +# mail: "email" +# name: "givenName" +# #bind_dn: +# #bind_password: +# #filter: "(objectClass=posixAccount)" + + + +# Clients requesting push notifications can either have the body of +# the message sent in the notification poke along with other details +# like the sender, or just the event ID and room ID (`event_id_only`). +# If clients choose the former, this option controls whether the +# notification request includes the content of the event (other details +# like the sender are still included). For `event_id_only` push, it +# has no effect. +# +# For modern android devices the notification content will still appear +# because it is loaded by the app. iPhone, however will send a +# notification saying only that a message arrived and who it came from. +# +#push: +# include_content: true + + +#spam_checker: +# module: "my_custom_project.SuperSpamChecker" +# config: +# example_option: 'things' + + +# Uncomment to allow non-server-admin users to create groups on this server +# +#enable_group_creation: true + +# If enabled, non server admins can only create groups with local parts +# starting with this prefix +# +#group_creation_prefix: "unofficial/" + + + +# User Directory configuration +# +# 'search_all_users' defines whether to search all users visible to your HS +# when searching the user directory, rather than limiting to users visible +# in public rooms. Defaults to false. If you set it True, you'll have to run +# UPDATE user_directory_stream_pos SET stream_id = NULL; +# on your database to tell it to rebuild the user_directory search indexes. +# +#user_directory: +# search_all_users: false + + +# User Consent configuration +# +# for detailed instructions, see +# https://github.com/matrix-org/synapse/blob/master/docs/consent_tracking.md +# +# Parts of this section are required if enabling the 'consent' resource under +# 'listeners', in particular 'template_dir' and 'version'. +# +# 'template_dir' gives the location of the templates for the HTML forms. +# This directory should contain one subdirectory per language (eg, 'en', 'fr'), +# and each language directory should contain the policy document (named as +# '<version>.html') and a success page (success.html). +# +# 'version' specifies the 'current' version of the policy document. It defines +# the version to be served by the consent resource if there is no 'v' +# parameter. +# +# 'server_notice_content', if enabled, will send a user a "Server Notice" +# asking them to consent to the privacy policy. The 'server_notices' section +# must also be configured for this to work. Notices will *not* be sent to +# guest users unless 'send_server_notice_to_guests' is set to true. +# +# 'block_events_error', if set, will block any attempts to send events +# until the user consents to the privacy policy. The value of the setting is +# used as the text of the error. +# +# 'require_at_registration', if enabled, will add a step to the registration +# process, similar to how captcha works. Users will be required to accept the +# policy before their account is created. +# +# 'policy_name' is the display name of the policy users will see when registering +# for an account. Has no effect unless `require_at_registration` is enabled. +# Defaults to "Privacy Policy". +# +#user_consent: +# template_dir: res/templates/privacy +# version: 1.0 +# server_notice_content: +# msgtype: m.text +# body: >- +# To continue using this homeserver you must review and agree to the +# terms and conditions at %(consent_uri)s +# send_server_notice_to_guests: True +# block_events_error: >- +# To continue using this homeserver you must review and agree to the +# terms and conditions at %(consent_uri)s +# require_at_registration: False +# policy_name: Privacy Policy +# + + +# Server Notices room configuration +# +# Uncomment this section to enable a room which can be used to send notices +# from the server to users. It is a special room which cannot be left; notices +# come from a special "notices" user id. +# +# If you uncomment this section, you *must* define the system_mxid_localpart +# setting, which defines the id of the user which will be used to send the +# notices. +# +# It's also possible to override the room name, the display name of the +# "notices" user, and the avatar for the user. +# +#server_notices: +# system_mxid_localpart: notices +# system_mxid_display_name: "Server Notices" +# system_mxid_avatar_url: "mxc://server.com/oumMVlgDnLYFaPVkExemNVVZ" +# room_name: "Server Notices" + + + +# The `alias_creation` option controls who's allowed to create aliases +# on this server. +# +# The format of this option is a list of rules that contain globs that +# match against user_id, room_id and the new alias (fully qualified with +# server name). The action in the first rule that matches is taken, +# which can currently either be "allow" or "deny". +# +# Missing user_id/room_id/alias fields default to "*". +# +# If no rules match the request is denied. An empty list means no one +# can create aliases. +# +# Options for the rules include: +# +# user_id: Matches against the creator of the alias +# alias: Matches against the alias being created +# room_id: Matches against the room ID the alias is being pointed at +# action: Whether to "allow" or "deny" the request if the rule matches +# +# The default is: +# +#alias_creation_rules: +# - user_id: "*" +# alias: "*" +# room_id: "*" +# action: allow + +# The `room_list_publication_rules` option controls who can publish and +# which rooms can be published in the public room list. +# +# The format of this option is the same as that for +# `alias_creation_rules`. +# +# If the room has one or more aliases associated with it, only one of +# the aliases needs to match the alias rule. If there are no aliases +# then only rules with `alias: *` match. +# +# If no rules match the request is denied. An empty list means no one +# can publish rooms. +# +# Options for the rules include: +# +# user_id: Matches agaisnt the creator of the alias +# room_id: Matches against the room ID being published +# alias: Matches against any current local or canonical aliases +# associated with the room +# action: Whether to "allow" or "deny" the request if the rule matches +# +# The default is: +# +#room_list_publication_rules: +# - user_id: "*" +# alias: "*" +# room_id: "*" +# action: allow diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst index 73436cea62..75e723484c 100644 --- a/docs/tcp_replication.rst +++ b/docs/tcp_replication.rst @@ -188,7 +188,9 @@ RDATA (S) A single update in a stream POSITION (S) - The position of the stream has been updated + The position of the stream has been updated. Sent to the client after all + missing updates for a stream have been sent to the client and they're now + up to date. ERROR (S, C) There was an error diff --git a/docs/workers.rst b/docs/workers.rst index 3ba5879f76..d80fc04d2e 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -182,6 +182,7 @@ endpoints matching the following regular expressions:: ^/_matrix/federation/v1/event_auth/ ^/_matrix/federation/v1/exchange_third_party_invite/ ^/_matrix/federation/v1/send/ + ^/_matrix/key/v2/query The above endpoints should all be routed to the federation_reader worker by the reverse-proxy configuration. @@ -223,6 +224,9 @@ following regular expressions:: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ ^/_matrix/client/(api/v1|r0|unstable)/login$ + ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ Additionally, the following REST endpoints can be handled, but all requests must be routed to the same instance:: diff --git a/scripts-dev/check-newsfragment b/scripts-dev/check-newsfragment index e4a22bae61..e0ac84198e 100755 --- a/scripts-dev/check-newsfragment +++ b/scripts-dev/check-newsfragment @@ -7,14 +7,12 @@ set -e # make sure that origin/develop is up to date git remote set-branches --add origin develop -git fetch --depth=1 origin develop - -UPSTREAM=origin/develop +git fetch origin develop # if there are changes in the debian directory, check that the debian changelog # has been updated -if ! git diff --quiet $UPSTREAM... -- debian; then - if git diff --quiet $UPSTREAM... -- debian/changelog; then +if ! git diff --quiet FETCH_HEAD... -- debian; then + if git diff --quiet FETCH_HEAD... -- debian/changelog; then echo "Updates to debian directory, but no update to the changelog." >&2 exit 1 fi @@ -22,7 +20,7 @@ fi # if there are changes *outside* the debian directory, check that the # newsfragments have been updated. -if git diff --name-only $UPSTREAM... | grep -qv '^develop/'; then +if git diff --name-only FETCH_HEAD... | grep -qv '^debian/'; then tox -e check-newsfragment fi @@ -31,7 +29,7 @@ echo "--------------------------" echo # check that any new newsfiles on this branch end with a full stop. -for f in `git diff --name-only $UPSTREAM... -- changelog.d`; do +for f in `git diff --name-only FETCH_HEAD... -- changelog.d`; do lastchar=`tr -d '\n' < $f | tail -c 1` if [ $lastchar != '.' ]; then echo -e "\e[31mERROR: newsfragment $f does not end with a '.'\e[39m" >&2 diff --git a/scripts-dev/generate_sample_config b/scripts-dev/generate_sample_config new file mode 100755 index 0000000000..5e33b9b549 --- /dev/null +++ b/scripts-dev/generate_sample_config @@ -0,0 +1,18 @@ +#!/bin/bash +# +# Update/check the docs/sample_config.yaml + +set -e + +cd `dirname $0`/.. + +SAMPLE_CONFIG="docs/sample_config.yaml" + +if [ "$1" == "--check" ]; then + diff -u "$SAMPLE_CONFIG" <(./scripts/generate_config --header-file docs/.sample_config_header.yaml) >/dev/null || { + echo -e "\e[1m\e[31m$SAMPLE_CONFIG is not up-to-date. Regenerate it with \`scripts-dev/generate_sample_config\`.\e[0m" >&2 + exit 1 + } +else + ./scripts/generate_config --header-file docs/.sample_config_header.yaml -o "$SAMPLE_CONFIG" +fi diff --git a/scripts/generate_config b/scripts/generate_config index 61c5f049e8..93b6406992 100755 --- a/scripts/generate_config +++ b/scripts/generate_config @@ -1,6 +1,7 @@ #!/usr/bin/env python import argparse +import shutil import sys from synapse.config.homeserver import HomeServerConfig @@ -50,6 +51,13 @@ if __name__ == "__main__": help="File to write the configuration to. Default: stdout", ) + parser.add_argument( + "--header-file", + type=argparse.FileType('r'), + help="File from which to read a header, which will be printed before the " + "generated config.", + ) + args = parser.parse_args() report_stats = args.report_stats @@ -64,4 +72,7 @@ if __name__ == "__main__": report_stats=report_stats, ) + if args.header_file: + shutil.copyfileobj(args.header_file, args.output_file) + args.output_file.write(conf) diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index 3bb5b3da37..296c4a1c17 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -14,6 +14,8 @@ import collections +from synapse.api.errors import LimitExceededError + class Ratelimiter(object): """ @@ -23,12 +25,13 @@ class Ratelimiter(object): def __init__(self): self.message_counts = collections.OrderedDict() - def send_message(self, user_id, time_now_s, msg_rate_hz, burst_count, update=True): - """Can the user send a message? + def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True): + """Can the entity (e.g. user or IP address) perform the action? Args: - user_id: The user sending a message. + key: The key we should use when rate limiting. Can be a user ID + (when sending events), an IP address, etc. time_now_s: The time now. - msg_rate_hz: The long term number of messages a user can send in a + rate_hz: The long term number of messages a user can send in a second. burst_count: How many messages the user can send before being limited. @@ -41,10 +44,10 @@ class Ratelimiter(object): """ self.prune_message_counts(time_now_s) message_count, time_start, _ignored = self.message_counts.get( - user_id, (0., time_now_s, None), + key, (0., time_now_s, None), ) time_delta = time_now_s - time_start - sent_count = message_count - time_delta * msg_rate_hz + sent_count = message_count - time_delta * rate_hz if sent_count < 0: allowed = True time_start = time_now_s @@ -56,13 +59,13 @@ class Ratelimiter(object): message_count += 1 if update: - self.message_counts[user_id] = ( - message_count, time_start, msg_rate_hz + self.message_counts[key] = ( + message_count, time_start, rate_hz ) - if msg_rate_hz > 0: + if rate_hz > 0: time_allowed = ( - time_start + (message_count - burst_count + 1) / msg_rate_hz + time_start + (message_count - burst_count + 1) / rate_hz ) if time_allowed < time_now_s: time_allowed = time_now_s @@ -72,12 +75,22 @@ class Ratelimiter(object): return allowed, time_allowed def prune_message_counts(self, time_now_s): - for user_id in list(self.message_counts.keys()): - message_count, time_start, msg_rate_hz = ( - self.message_counts[user_id] + for key in list(self.message_counts.keys()): + message_count, time_start, rate_hz = ( + self.message_counts[key] ) time_delta = time_now_s - time_start - if message_count - time_delta * msg_rate_hz > 0: + if message_count - time_delta * rate_hz > 0: break else: - del self.message_counts[user_id] + del self.message_counts[key] + + def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True): + allowed, time_allowed = self.can_do_action( + key, time_now_s, rate_hz, burst_count, update + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now_s)), + ) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 32e8b8a3f5..d4c6c4c8e2 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -63,12 +63,13 @@ def start_worker_reactor(appname, config): start_reactor( appname, - config.soft_file_limit, - config.gc_thresholds, - config.worker_pid_file, - config.worker_daemonize, - config.worker_cpu_affinity, - logger, + soft_file_limit=config.soft_file_limit, + gc_thresholds=config.gc_thresholds, + pid_file=config.worker_pid_file, + daemonize=config.worker_daemonize, + cpu_affinity=config.worker_cpu_affinity, + print_pidfile=config.print_pidfile, + logger=logger, ) @@ -79,6 +80,7 @@ def start_reactor( pid_file, daemonize, cpu_affinity, + print_pidfile, logger, ): """ Run the reactor in the main process @@ -93,6 +95,7 @@ def start_reactor( pid_file (str): name of pid file to write to if daemonize is True daemonize (bool): true to run the reactor in a background process cpu_affinity (int|None): cpu affinity mask + print_pidfile (bool): whether to print the pid file, if daemonize is True logger (logging.Logger): logger instance to pass to Daemonize """ @@ -124,6 +127,9 @@ def start_reactor( reactor.run() if daemonize: + if print_pidfile: + print(pid_file) + daemon = Daemonize( app=appname, pid=pid_file, diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 043b48f8f3..beaea64a61 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -33,9 +33,13 @@ from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.keys import SlavedKeyStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore @@ -48,6 +52,8 @@ from synapse.rest.client.v1.room import ( RoomMemberListRestServlet, RoomStateRestServlet, ) +from synapse.rest.client.v2_alpha.account import ThreepidRestServlet +from synapse.rest.client.v2_alpha.keys import KeyChangesServlet, KeyQueryServlet from synapse.rest.client.v2_alpha.register import RegisterRestServlet from synapse.server import HomeServer from synapse.storage.engines import create_engine @@ -60,6 +66,10 @@ logger = logging.getLogger("synapse.app.client_reader") class ClientReaderSlavedStore( + SlavedDeviceInboxStore, + SlavedDeviceStore, + SlavedReceiptsStore, + SlavedPushRuleStore, SlavedAccountDataStore, SlavedEventStore, SlavedKeyStore, @@ -96,6 +106,9 @@ class ClientReaderServer(HomeServer): RoomEventContextServlet(self).register(resource) RegisterRestServlet(self).register(resource) LoginRestServlet(self).register(resource) + ThreepidRestServlet(self).register(resource) + KeyQueryServlet(self).register(resource) + KeyChangesServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index b116c17669..7da79dc827 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -21,7 +21,7 @@ from twisted.web.resource import NoResource import synapse from synapse import events -from synapse.api.urls import FEDERATION_PREFIX +from synapse.api.urls import FEDERATION_PREFIX, SERVER_KEY_V2_PREFIX from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig @@ -44,6 +44,7 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.key.v2 import KeyApiV2Resource from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -99,6 +100,9 @@ class FederationReaderServer(HomeServer): ), }) + if name in ["keys", "federation"]: + resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index a461442fdc..9711a7147c 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -28,6 +28,7 @@ from synapse.config.logger import setup_logging from synapse.federation import send_queue from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.devices import SlavedDeviceStore @@ -37,8 +38,10 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.replication.tcp.streams import ReceiptsStream from synapse.server import HomeServer from synapse.storage.engines import create_engine +from synapse.types import ReadReceipt from synapse.util.async_helpers import Linearizer from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, run_in_background @@ -202,6 +205,7 @@ class FederationSenderHandler(object): """ def __init__(self, hs, replication_client): self.store = hs.get_datastore() + self._is_mine_id = hs.is_mine_id self.federation_sender = hs.get_federation_sender() self.replication_client = replication_client @@ -234,6 +238,32 @@ class FederationSenderHandler(object): elif stream_name == "events": self.federation_sender.notify_new_events(token) + # ... and when new receipts happen + elif stream_name == ReceiptsStream.NAME: + run_as_background_process( + "process_receipts_for_federation", self._on_new_receipts, rows, + ) + + @defer.inlineCallbacks + def _on_new_receipts(self, rows): + """ + Args: + rows (iterable[synapse.replication.tcp.streams.ReceiptsStreamRow]): + new receipts to be processed + """ + for receipt in rows: + # we only want to send on receipts for our own users + if not self._is_mine_id(receipt.user_id): + continue + receipt_info = ReadReceipt( + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + [receipt.event_id], + receipt.data, + ) + yield self.federation_sender.send_read_receipt(receipt_info) + @defer.inlineCallbacks def update_token(self, token): try: diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e8b6cc3114..869c028d1f 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -376,6 +376,7 @@ def setup(config_options): logger.info("Database prepared in %s.", config.database_config['name']) hs.setup() + hs.setup_master() @defer.inlineCallbacks def do_acme(): @@ -636,17 +637,15 @@ def run(hs): # be quite busy the first few minutes clock.call_later(5 * 60, start_phone_stats_home) - if hs.config.daemonize and hs.config.print_pidfile: - print(hs.config.pid_file) - _base.start_reactor( "synapse-homeserver", - hs.config.soft_file_limit, - hs.config.gc_thresholds, - hs.config.pid_file, - hs.config.daemonize, - hs.config.cpu_affinity, - logger, + soft_file_limit=hs.config.soft_file_limit, + gc_thresholds=hs.config.gc_thresholds, + pid_file=hs.config.pid_file, + daemonize=hs.config.daemonize, + cpu_affinity=hs.config.cpu_affinity, + print_pidfile=hs.config.print_pidfile, + logger=logger, ) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 5aec43b702..5613f38e4d 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -180,9 +180,7 @@ class Config(object): Returns: str: the yaml config file """ - default_config = "# vim:ft=yaml\n" - - default_config += "\n\n".join( + default_config = "\n\n".join( dedent(conf) for conf in self.invoke_all( "default_config", @@ -216,14 +214,20 @@ class Config(object): " Defaults to the directory containing the last config file", ) + obj = cls() + + obj.invoke_all("add_arguments", config_parser) + config_args = config_parser.parse_args(argv) config_files = find_config_files(search_paths=config_args.config_path) - obj = cls() obj.read_config_files( config_files, keys_directory=config_args.keys_directory, generate_keys=False ) + + obj.invoke_all("read_arguments", config_args) + return obj @classmethod @@ -297,19 +301,26 @@ class Config(object): "Must specify a server_name to a generate config for." " Pass -H server.name." ) + + config_str = obj.generate_config( + config_dir_path=config_dir_path, + data_dir_path=os.getcwd(), + server_name=server_name, + report_stats=(config_args.report_stats == "yes"), + generate_secrets=True, + ) + if not cls.path_exists(config_dir_path): os.makedirs(config_dir_path) with open(config_path, "w") as config_file: - config_str = obj.generate_config( - config_dir_path=config_dir_path, - data_dir_path=os.getcwd(), - server_name=server_name, - report_stats=(config_args.report_stats == "yes"), - generate_secrets=True, + config_file.write( + "# vim:ft=yaml\n\n" ) - config = yaml.load(config_str) - obj.invoke_all("generate_files", config) config_file.write(config_str) + + config = yaml.load(config_str) + obj.invoke_all("generate_files", config) + print( ( "A config file has been generated in %r for server name" diff --git a/synapse/config/api.py b/synapse/config/api.py index e8a753f002..5eb4f86fa2 100644 --- a/synapse/config/api.py +++ b/synapse/config/api.py @@ -34,10 +34,10 @@ class ApiConfig(Config): # A list of event types that will be included in the room_invite_state # - room_invite_state_types: - - "{JoinRules}" - - "{CanonicalAlias}" - - "{RoomAvatar}" - - "{RoomEncryption}" - - "{Name}" + #room_invite_state_types: + # - "{JoinRules}" + # - "{CanonicalAlias}" + # - "{RoomAvatar}" + # - "{RoomEncryption}" + # - "{Name}" """.format(**vars(EventTypes)) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index c260d59464..9e64c76544 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -37,14 +37,16 @@ class AppServiceConfig(Config): def default_config(cls, **kwargs): return """\ - # A list of application service config file to use + # A list of application service config files to use # - app_service_config_files: [] + #app_service_config_files: + # - app_service_1.yaml + # - app_service_2.yaml - # Whether or not to track application service IP addresses. Implicitly + # Uncomment to enable tracking of application service IP addresses. Implicitly # enables MAU tracking for application service users. # - track_appservice_user_ips: False + #track_appservice_user_ips: True """ diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index d25196be08..f7eebf26d2 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -18,11 +18,16 @@ from ._base import Config class CaptchaConfig(Config): def read_config(self, config): - self.recaptcha_private_key = config["recaptcha_private_key"] - self.recaptcha_public_key = config["recaptcha_public_key"] - self.enable_registration_captcha = config["enable_registration_captcha"] + self.recaptcha_private_key = config.get("recaptcha_private_key") + self.recaptcha_public_key = config.get("recaptcha_public_key") + self.enable_registration_captcha = config.get( + "enable_registration_captcha", False + ) self.captcha_bypass_secret = config.get("captcha_bypass_secret") - self.recaptcha_siteverify_api = config["recaptcha_siteverify_api"] + self.recaptcha_siteverify_api = config.get( + "recaptcha_siteverify_api", + "https://www.recaptcha.net/recaptcha/api/siteverify", + ) def default_config(self, **kwargs): return """\ @@ -31,21 +36,23 @@ class CaptchaConfig(Config): # This Home Server's ReCAPTCHA public key. # - recaptcha_public_key: "YOUR_PUBLIC_KEY" + #recaptcha_public_key: "YOUR_PUBLIC_KEY" # This Home Server's ReCAPTCHA private key. # - recaptcha_private_key: "YOUR_PRIVATE_KEY" + #recaptcha_private_key: "YOUR_PRIVATE_KEY" # Enables ReCaptcha checks when registering, preventing signup # unless a captcha is answered. Requires a valid ReCaptcha # public/private key. # - enable_registration_captcha: False + #enable_registration_captcha: false # A secret key used to bypass the captcha test entirely. + # #captcha_bypass_secret: "YOUR_SECRET_HERE" # The API endpoint to use for verifying m.login.recaptcha responses. - recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" + # + #recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" """ diff --git a/synapse/config/database.py b/synapse/config/database.py index c8890147a6..3c27ed6b4a 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -49,7 +49,8 @@ class DatabaseConfig(Config): def default_config(self, data_dir_path, **kwargs): database_path = os.path.join(data_dir_path, "homeserver.db") return """\ - # Database configuration + ## Database ## + database: # The database engine name name: "sqlite3" @@ -59,7 +60,8 @@ class DatabaseConfig(Config): database: "%(database_path)s" # Number of events to cache in memory. - event_cache_size: "10K" + # + #event_cache_size: 10K """ % locals() def read_arguments(self, args): diff --git a/synapse/config/groups.py b/synapse/config/groups.py index 46933a904c..e4be172a79 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -23,9 +23,9 @@ class GroupsConfig(Config): def default_config(self, **kwargs): return """\ - # Whether to allow non server admins to create groups on this server + # Uncomment to allow non-server-admin users to create groups on this server # - enable_group_creation: false + #enable_group_creation: true # If enabled, non server admins can only create groups with local parts # starting with this prefix diff --git a/synapse/config/key.py b/synapse/config/key.py index 35f05fa974..2bd5531acb 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -43,10 +43,16 @@ class KeyConfig(Config): config.get("old_signing_keys", {}) ) self.key_refresh_interval = self.parse_duration( - config["key_refresh_interval"] + config.get("key_refresh_interval", "1d"), ) self.perspectives = self.read_perspectives( - config["perspectives"] + config.get("perspectives", {}).get("servers", { + "matrix.org": {"verify_keys": { + "ed25519:auto": { + "key": "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw", + } + }} + }) ) self.macaroon_secret_key = config.get( @@ -88,7 +94,7 @@ class KeyConfig(Config): # Used to enable access token expiration. # - expire_access_token: False + #expire_access_token: False # a secret which is used to calculate HMACs for form values, to stop # falsification of values. Must be specified for the User Consent @@ -117,21 +123,21 @@ class KeyConfig(Config): # Determines how quickly servers will query to check which keys # are still valid. # - key_refresh_interval: "1d" # 1 Day. + #key_refresh_interval: 1d # The trusted servers to download signing keys from. # - perspectives: - servers: - "matrix.org": - verify_keys: - "ed25519:auto": - key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" + #perspectives: + # servers: + # "matrix.org": + # verify_keys: + # "ed25519:auto": + # key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" """ % locals() - def read_perspectives(self, perspectives_config): + def read_perspectives(self, perspectives_servers): servers = {} - for server_name, server_config in perspectives_config["servers"].items(): + for server_name, server_config in perspectives_servers.items(): for key_id, key_data in server_config["verify_keys"].items(): if is_signing_algorithm_supported(key_id): key_base64 = key_data["key"] diff --git a/synapse/config/logger.py b/synapse/config/logger.py index f6940b65fd..464c28c2d9 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -81,7 +81,9 @@ class LoggingConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): log_config = os.path.join(config_dir_path, server_name + ".log.config") - return """ + return """\ + ## Logging ## + # A yaml python logging config file # log_config: "%(log_config)s" diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index ed0498c634..2de51979d8 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -24,7 +24,7 @@ MISSING_SENTRY = ( class MetricsConfig(Config): def read_config(self, config): - self.enable_metrics = config["enable_metrics"] + self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") @@ -48,7 +48,7 @@ class MetricsConfig(Config): # Enable collection and rendering of performance metrics # - enable_metrics: False + #enable_metrics: False # Enable sentry integration # NOTE: While attempts are made to ensure that the logs don't contain diff --git a/synapse/config/password.py b/synapse/config/password.py index 2a52b9db54..eea59e772b 100644 --- a/synapse/config/password.py +++ b/synapse/config/password.py @@ -22,16 +22,21 @@ class PasswordConfig(Config): def read_config(self, config): password_config = config.get("password_config", {}) + if password_config is None: + password_config = {} + self.password_enabled = password_config.get("enabled", True) self.password_pepper = password_config.get("pepper", "") def default_config(self, config_dir_path, server_name, **kwargs): - return """ - # Enable password for login. - # + return """\ password_config: - enabled: true + # Uncomment to disable password login + # + #enabled: false + # Uncomment and change to a secret random string for extra security. # DO NOT CHANGE THIS AFTER INITIAL SETUP! - #pepper: "" + # + #pepper: "EVEN_MORE_SECRET" """ diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 54b71e6841..898a19dd8c 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -15,17 +15,32 @@ from ._base import Config +class RateLimitConfig(object): + def __init__(self, config): + self.per_second = config.get("per_second", 0.17) + self.burst_count = config.get("burst_count", 3.0) + + class RatelimitConfig(Config): def read_config(self, config): - self.rc_messages_per_second = config["rc_messages_per_second"] - self.rc_message_burst_count = config["rc_message_burst_count"] + self.rc_messages_per_second = config.get("rc_messages_per_second", 0.2) + self.rc_message_burst_count = config.get("rc_message_burst_count", 10.0) + + self.rc_registration = RateLimitConfig(config.get("rc_registration", {})) - self.federation_rc_window_size = config["federation_rc_window_size"] - self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"] - self.federation_rc_sleep_delay = config["federation_rc_sleep_delay"] - self.federation_rc_reject_limit = config["federation_rc_reject_limit"] - self.federation_rc_concurrent = config["federation_rc_concurrent"] + rc_login_config = config.get("rc_login", {}) + self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) + self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) + self.rc_login_failed_attempts = RateLimitConfig( + rc_login_config.get("failed_attempts", {}), + ) + + self.federation_rc_window_size = config.get("federation_rc_window_size", 1000) + self.federation_rc_sleep_limit = config.get("federation_rc_sleep_limit", 10) + self.federation_rc_sleep_delay = config.get("federation_rc_sleep_delay", 500) + self.federation_rc_reject_limit = config.get("federation_rc_reject_limit", 50) + self.federation_rc_concurrent = config.get("federation_rc_concurrent", 3) def default_config(self, **kwargs): return """\ @@ -33,33 +48,67 @@ class RatelimitConfig(Config): # Number of messages a client can send per second # - rc_messages_per_second: 0.2 + #rc_messages_per_second: 0.2 # Number of message a client can send before being throttled # - rc_message_burst_count: 10.0 + #rc_message_burst_count: 10.0 + + # Ratelimiting settings for registration and login. + # + # Each ratelimiting configuration is made of two parameters: + # - per_second: number of requests a client can send per second. + # - burst_count: number of requests a client can send before being throttled. + # + # Synapse currently uses the following configurations: + # - one for registration that ratelimits registration requests based on the + # client's IP address. + # - one for login that ratelimits login requests based on the client's IP + # address. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into, based on the amount of failed login + # attempts for this account. + # + # The defaults are as shown below. + # + #rc_registration: + # per_second: 0.17 + # burst_count: 3 + # + #rc_login: + # address: + # per_second: 0.17 + # burst_count: 3 + # account: + # per_second: 0.17 + # burst_count: 3 + # failed_attempts: + # per_second: 0.17 + # burst_count: 3 # The federation window size in milliseconds # - federation_rc_window_size: 1000 + #federation_rc_window_size: 1000 # The number of federation requests from a single server in a window # before the server will delay processing the request. # - federation_rc_sleep_limit: 10 + #federation_rc_sleep_limit: 10 # The duration in milliseconds to delay processing events from # remote servers by if they go over the sleep limit. # - federation_rc_sleep_delay: 500 + #federation_rc_sleep_delay: 500 # The maximum number of concurrent federation requests allowed # from a single server # - federation_rc_reject_limit: 50 + #federation_rc_reject_limit: 50 # The number of federation requests to concurrently process from a # single server # - federation_rc_concurrent: 3 + #federation_rc_concurrent: 3 """ diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 2881482f96..f6b2b9ceee 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -24,7 +24,7 @@ class RegistrationConfig(Config): def read_config(self, config): self.enable_registration = bool( - strtobool(str(config["enable_registration"])) + strtobool(str(config.get("enable_registration", False))) ) if "disable_registration" in config: self.enable_registration = not bool( @@ -36,7 +36,10 @@ class RegistrationConfig(Config): self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) - self.trusted_third_party_id_servers = config["trusted_third_party_id_servers"] + self.trusted_third_party_id_servers = config.get( + "trusted_third_party_id_servers", + ["matrix.org", "vector.im"], + ) self.default_identity_server = config.get("default_identity_server") self.allow_guest_access = config.get("allow_guest_access", False) @@ -64,9 +67,13 @@ class RegistrationConfig(Config): return """\ ## Registration ## + # + # Registration can be rate-limited using the parameters in the "Ratelimiting" + # section of this file. # Enable registration for new users. - enable_registration: False + # + #enable_registration: false # The user must provide all of the below types of 3PID when registering. # @@ -77,7 +84,7 @@ class RegistrationConfig(Config): # Explicitly disable asking for MSISDNs from the registration # flow (overrides registrations_require_3pid if MSISDNs are set as required) # - #disable_msisdn_registration: True + #disable_msisdn_registration: true # Mandate that users are only allowed to associate certain formats of # 3PIDs with accounts on this server. @@ -90,8 +97,8 @@ class RegistrationConfig(Config): # - medium: msisdn # pattern: '\\+44' - # If set, allows registration by anyone who also has the shared - # secret, even if registration is otherwise disabled. + # If set, allows registration of standard or admin accounts by anyone who + # has the shared secret, even if registration is otherwise disabled. # %(registration_shared_secret)s @@ -101,13 +108,13 @@ class RegistrationConfig(Config): # N.B. that increasing this will exponentially increase the time required # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. # - bcrypt_rounds: 12 + #bcrypt_rounds: 12 # Allows users to register as guests without a password/email/etc, and # participate in rooms hosted on this server which have been made # accessible to anonymous users. # - allow_guest_access: False + #allow_guest_access: false # The identity server which we suggest that clients should use when users log # in on this server. @@ -123,9 +130,9 @@ class RegistrationConfig(Config): # Also defines the ID server which will be called when an account is # deactivated (one will be picked arbitrarily). # - trusted_third_party_id_servers: - - matrix.org - - vector.im + #trusted_third_party_id_servers: + # - matrix.org + # - vector.im # Users who register on this homeserver will automatically be joined # to these rooms @@ -139,7 +146,7 @@ class RegistrationConfig(Config): # Setting to false means that if the rooms are not manually created, # users cannot be auto-joined since they do not exist. # - autocreate_auto_join_rooms: true + #autocreate_auto_join_rooms: true """ % locals() def add_arguments(self, parser): diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 97db2a5b7a..3f34ad9b2a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -19,6 +19,36 @@ from synapse.util.module_loader import load_module from ._base import Config, ConfigError +DEFAULT_THUMBNAIL_SIZES = [ + { + "width": 32, + "height": 32, + "method": "crop", + }, { + "width": 96, + "height": 96, + "method": "crop", + }, { + "width": 320, + "height": 240, + "method": "scale", + }, { + "width": 640, + "height": 480, + "method": "scale", + }, { + "width": 800, + "height": 600, + "method": "scale" + }, +] + +THUMBNAIL_SIZE_YAML = """\ + # - width: %(width)i + # height: %(height)i + # method: %(method)s +""" + MISSING_NETADDR = ( "Missing netaddr library. This is required for URL preview API." ) @@ -77,9 +107,9 @@ def parse_thumbnail_requirements(thumbnail_sizes): class ContentRepositoryConfig(Config): def read_config(self, config): - self.max_upload_size = self.parse_size(config["max_upload_size"]) - self.max_image_pixels = self.parse_size(config["max_image_pixels"]) - self.max_spider_size = self.parse_size(config["max_spider_size"]) + self.max_upload_size = self.parse_size(config.get("max_upload_size", "10M")) + self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M")) + self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M")) self.media_store_path = self.ensure_directory(config["media_store_path"]) @@ -139,9 +169,9 @@ class ContentRepositoryConfig(Config): ) self.uploads_path = self.ensure_directory(config["uploads_path"]) - self.dynamic_thumbnails = config["dynamic_thumbnails"] + self.dynamic_thumbnails = config.get("dynamic_thumbnails", False) self.thumbnail_requirements = parse_thumbnail_requirements( - config["thumbnail_sizes"] + config.get("thumbnail_sizes", DEFAULT_THUMBNAIL_SIZES), ) self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: @@ -178,6 +208,13 @@ class ContentRepositoryConfig(Config): def default_config(self, data_dir_path, **kwargs): media_store = os.path.join(data_dir_path, "media_store") uploads_path = os.path.join(data_dir_path, "uploads") + + formatted_thumbnail_sizes = "".join( + THUMBNAIL_SIZE_YAML % s for s in DEFAULT_THUMBNAIL_SIZES + ) + # strip final NL + formatted_thumbnail_sizes = formatted_thumbnail_sizes[:-1] + return r""" # Directory where uploaded images and attachments are stored. # @@ -204,11 +241,11 @@ class ContentRepositoryConfig(Config): # The largest allowed upload size in bytes # - max_upload_size: "10M" + #max_upload_size: 10M # Maximum number of pixels that will be thumbnailed # - max_image_pixels: "32M" + #max_image_pixels: 32M # Whether to generate new thumbnails on the fly to precisely match # the resolution requested by the client. If true then whenever @@ -216,32 +253,18 @@ class ContentRepositoryConfig(Config): # generate a new thumbnail. If false the server will pick a thumbnail # from a precalculated list. # - dynamic_thumbnails: false + #dynamic_thumbnails: false # List of thumbnails to precalculate when an image is uploaded. # - thumbnail_sizes: - - width: 32 - height: 32 - method: crop - - width: 96 - height: 96 - method: crop - - width: 320 - height: 240 - method: scale - - width: 640 - height: 480 - method: scale - - width: 800 - height: 600 - method: scale + #thumbnail_sizes: +%(formatted_thumbnail_sizes)s # Is the preview URL API enabled? If enabled, you *must* specify # an explicit url_preview_ip_range_blacklist of IPs that the spider is # denied from accessing. # - url_preview_enabled: False + #url_preview_enabled: false # List of IP address CIDR ranges that the URL preview spider is denied # from accessing. There are no defaults: you must explicitly @@ -306,6 +329,6 @@ class ContentRepositoryConfig(Config): # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' # The largest allowed URL preview spidering size in bytes - max_spider_size: "10M" - + # + #max_spider_size: 10M """ % locals() diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index aff0a1f00c..39b9eb29c2 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -64,7 +64,7 @@ class SAML2Config(Config): } def default_config(self, config_dir_path, server_name, **kwargs): - return """ + return """\ # Enable SAML2 for registration and login. Uses pysaml2. # # `sp_config` is the configuration for the pysaml2 Service Provider. diff --git a/synapse/config/server.py b/synapse/config/server.py index 4200f10da3..499eb30bea 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -45,7 +45,7 @@ class ServerConfig(Config): self.pid_file = self.abspath(config.get("pid_file")) self.web_client_location = config.get("web_client_location", None) - self.soft_file_limit = config["soft_file_limit"] + self.soft_file_limit = config.get("soft_file_limit", 0) self.daemonize = config.get("daemonize") self.print_pidfile = config.get("print_pidfile") self.user_agent_suffix = config.get("user_agent_suffix") @@ -260,9 +260,11 @@ class ServerConfig(Config): # This is used by remote servers to connect to this server, # e.g. matrix.org, localhost:8080, etc. # This is also the last part of your UserID. + # server_name: "%(server_name)s" # When running as a daemon, the file to store the pid in + # pid_file: %(pid_file)s # CPU affinity mask. Setting this restricts the CPUs on which the @@ -304,10 +306,12 @@ class ServerConfig(Config): # Set the soft limit on the number of file descriptors synapse can use # Zero is used to indicate synapse should set the soft limit to the # hard limit. - soft_file_limit: 0 + # + #soft_file_limit: 0 # Set to false to disable presence tracking on this homeserver. - use_presence: true + # + #use_presence: false # The GC threshold parameters to pass to `gc.set_threshold`, if defined # diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 40045de7ac..f0014902da 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -181,6 +181,11 @@ class TlsConfig(Config): # See 'ACME support' below to enable auto-provisioning this certificate via # Let's Encrypt. # + # If supplying your own, be sure to use a `.pem` file that includes the + # full certificate chain including any intermediate certificates (for + # instance, if using certbot, use `fullchain.pem` as your certificate, + # not `cert.pem`). + # #tls_certificate_path: "%(tls_certificate_path)s" # PEM-encoded private key for TLS diff --git a/synapse/config/voip.py b/synapse/config/voip.py index 257f7c86e7..2a1f005a37 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -22,7 +22,9 @@ class VoipConfig(Config): self.turn_shared_secret = config.get("turn_shared_secret") self.turn_username = config.get("turn_username") self.turn_password = config.get("turn_password") - self.turn_user_lifetime = self.parse_duration(config["turn_user_lifetime"]) + self.turn_user_lifetime = self.parse_duration( + config.get("turn_user_lifetime", "1h"), + ) self.turn_allow_guests = config.get("turn_allow_guests", True) def default_config(self, **kwargs): @@ -45,7 +47,7 @@ class VoipConfig(Config): # How long generated TURN credentials last # - turn_user_lifetime: "1h" + #turn_user_lifetime: 1h # Whether guests should be allowed to use the TURN server. # This defaults to True, otherwise VoIP will be unreliable for guests. @@ -53,5 +55,5 @@ class VoipConfig(Config): # connect to arbitrary endpoints without having first signed up for a # valid account (e.g. by passing a CAPTCHA). # - turn_allow_guests: True + #turn_allow_guests: True """ diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 80baf0ce0e..bfbd8b6c91 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -28,7 +28,7 @@ class WorkerConfig(Config): if self.worker_app == "synapse.app.homeserver": self.worker_app = None - self.worker_listeners = config.get("worker_listeners") + self.worker_listeners = config.get("worker_listeners", []) self.worker_daemonize = config.get("worker_daemonize") self.worker_pid_file = config.get("worker_pid_file") self.worker_log_file = config.get("worker_log_file") @@ -48,6 +48,17 @@ class WorkerConfig(Config): self.worker_main_http_uri = config.get("worker_main_http_uri", None) self.worker_cpu_affinity = config.get("worker_cpu_affinity") + # This option is really only here to support `--manhole` command line + # argument. + manhole = config.get("worker_manhole") + if manhole: + self.worker_listeners.append({ + "port": manhole, + "bind_addresses": ["127.0.0.1"], + "type": "manhole", + "tls": False, + }) + if self.worker_listeners: for listener in self.worker_listeners: bind_address = listener.pop("bind_address", None) @@ -57,3 +68,18 @@ class WorkerConfig(Config): bind_addresses.append(bind_address) elif not bind_addresses: bind_addresses.append('') + + def read_arguments(self, args): + # We support a bunch of command line arguments that override options in + # the config. A lot of these options have a worker_* prefix when running + # on workers so we also have to override them when command line options + # are specified. + + if args.daemonize is not None: + self.worker_daemonize = args.daemonize + if args.log_config is not None: + self.worker_log_config = args.log_config + if args.log_file is not None: + self.worker_log_file = args.log_file + if args.manhole is not None: + self.worker_manhole = args.worker_manhole diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 7474fd515f..0207cd989a 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -686,9 +686,9 @@ def _handle_key_deferred(verify_request): try: with PreserveLoggingContext(): _, key_id, verify_key = yield verify_request.deferred - except (IOError, RequestSendFailed) as e: + except KeyLookupError as e: logger.warn( - "Got IOError when downloading keys for %s: %s %s", + "Failed to download keys for %s: %s %s", server_name, type(e).__name__, str(e), ) raise SynapseError( diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 20c1ab4203..fafa135182 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -77,6 +77,20 @@ class _EventInternalMetadata(object): """ return getattr(self, "recheck_redaction", False) + def is_soft_failed(self): + """Whether the event has been soft failed. + + Soft failed events should be handled as usual, except: + 1. They should not go down sync or event streams, or generally + sent to clients. + 2. They should not be added to the forward extremities (and + therefore not to current state). + + Returns: + bool + """ + return getattr(self, "soft_failed", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. @@ -127,7 +141,6 @@ class EventBase(object): origin = _event_dict_property("origin") origin_server_ts = _event_dict_property("origin_server_ts") prev_events = _event_dict_property("prev_events") - prev_state = _event_dict_property("prev_state") redacts = _event_dict_property("redacts") room_id = _event_dict_property("room_id") sender = _event_dict_property("sender") diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 569eb277a9..81f3b4b1ff 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -886,6 +886,9 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry): def on_edu(self, edu_type, origin, content): """Overrides FederationHandlerRegistry """ + if not self.config.use_presence and edu_type == "m.presence": + return + handler = self.edu_handlers.get(edu_type) if handler: return super(ReplicationFederationHandlerRegistry, self).on_edu( diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 6f5995735a..04d04a4457 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -46,7 +46,7 @@ logger = logging.getLogger(__name__) class FederationRemoteSendQueue(object): - """A drop in replacement for TransactionQueue""" + """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname @@ -154,13 +154,17 @@ class FederationRemoteSendQueue(object): del self.device_messages[key] def notify_new_events(self, current_id): - """As per TransactionQueue""" + """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass - def send_edu(self, destination, edu_type, content, key=None): - """As per TransactionQueue""" + def build_and_send_edu(self, destination, edu_type, content, key=None): + """As per FederationSender""" + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + pos = self._next_pos() edu = Edu( @@ -179,8 +183,17 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() + def send_read_receipt(self, receipt): + """As per FederationSender + + Args: + receipt (synapse.types.ReadReceipt): + """ + # nothing to do here: the replication listener will handle it. + pass + def send_presence(self, states): - """As per TransactionQueue + """As per FederationSender Args: states (list(UserPresenceState)) @@ -197,7 +210,7 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() def send_device_messages(self, destination): - """As per TransactionQueue""" + """As per FederationSender""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() @@ -435,7 +448,7 @@ def process_rows_for_federation(transaction_queue, rows): transaction queue ready for sending to the relevant homeservers. Args: - transaction_queue (TransactionQueue) + transaction_queue (FederationSender) rows (list(synapse.replication.tcp.streams.FederationStreamRow)) """ @@ -465,15 +478,11 @@ def process_rows_for_federation(transaction_queue, rows): for destination, edu_map in iteritems(buff.keyed_edus): for key, edu in edu_map.items(): - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=key, - ) + transaction_queue.send_edu(edu, key) for destination, edu_list in iteritems(buff.edus): for edu in edu_list: - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=None, - ) + transaction_queue.send_edu(edu, None) for destination in buff.device_destinations: transaction_queue.send_device_messages(destination) diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py new file mode 100644 index 0000000000..1bcc353d18 --- /dev/null +++ b/synapse/federation/sender/__init__.py @@ -0,0 +1,388 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from six import itervalues + +from prometheus_client import Counter + +from twisted.internet import defer + +import synapse.metrics +from synapse.federation.sender.per_destination_queue import PerDestinationQueue +from synapse.federation.sender.transaction_manager import TransactionManager +from synapse.federation.units import Edu +from synapse.handlers.presence import get_interested_remotes +from synapse.metrics import ( + LaterGauge, + event_processing_loop_counter, + event_processing_loop_room_count, + events_processed_counter, +) +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.util import logcontext +from synapse.util.metrics import measure_func + +logger = logging.getLogger(__name__) + +sent_pdus_destination_dist_count = Counter( + "synapse_federation_client_sent_pdu_destinations:count", + "Number of PDUs queued for sending to one or more destinations", +) + +sent_pdus_destination_dist_total = Counter( + "synapse_federation_client_sent_pdu_destinations:total", "" + "Total number of PDUs queued for sending across all destinations", +) + + +class FederationSender(object): + def __init__(self, hs): + self.hs = hs + self.server_name = hs.hostname + + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + + self.clock = hs.get_clock() + self.is_mine_id = hs.is_mine_id + + self._transaction_manager = TransactionManager(hs) + + # map from destination to PerDestinationQueue + self._per_destination_queues = {} # type: dict[str, PerDestinationQueue] + + LaterGauge( + "synapse_federation_transaction_queue_pending_destinations", + "", + [], + lambda: sum( + 1 for d in self._per_destination_queues.values() + if d.transmission_loop_running + ), + ) + + # Map of user_id -> UserPresenceState for all the pending presence + # to be sent out by user_id. Entries here get processed and put in + # pending_presence_by_dest + self.pending_presence = {} + + LaterGauge( + "synapse_federation_transaction_queue_pending_pdus", + "", + [], + lambda: sum( + d.pending_pdu_count() for d in self._per_destination_queues.values() + ), + ) + LaterGauge( + "synapse_federation_transaction_queue_pending_edus", + "", + [], + lambda: sum( + d.pending_edu_count() for d in self._per_destination_queues.values() + ), + ) + + self._order = 1 + + self._is_processing = False + self._last_poked_id = -1 + + self._processing_pending_presence = False + + def _get_per_destination_queue(self, destination): + queue = self._per_destination_queues.get(destination) + if not queue: + queue = PerDestinationQueue(self.hs, self._transaction_manager, destination) + self._per_destination_queues[destination] = queue + return queue + + def notify_new_events(self, current_id): + """This gets called when we have some new events we might want to + send out to other servers. + """ + self._last_poked_id = max(current_id, self._last_poked_id) + + if self._is_processing: + return + + # fire off a processing loop in the background + run_as_background_process( + "process_event_queue_for_federation", + self._process_event_queue_loop, + ) + + @defer.inlineCallbacks + def _process_event_queue_loop(self): + try: + self._is_processing = True + while True: + last_token = yield self.store.get_federation_out_pos("events") + next_token, events = yield self.store.get_all_new_events_stream( + last_token, self._last_poked_id, limit=100, + ) + + logger.debug("Handling %s -> %s", last_token, next_token) + + if not events and next_token >= self._last_poked_id: + break + + @defer.inlineCallbacks + def handle_event(event): + # Only send events for this server. + send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() + is_mine = self.is_mine_id(event.sender) + if not is_mine and send_on_behalf_of is None: + return + + try: + # Get the state from before the event. + # We need to make sure that this is the state from before + # the event and not from after it. + # Otherwise if the last member on a server in a room is + # banned then it won't receive the event because it won't + # be in the room after the ban. + destinations = yield self.state.get_current_hosts_in_room( + event.room_id, latest_event_ids=event.prev_event_ids(), + ) + except Exception: + logger.exception( + "Failed to calculate hosts in room for event: %s", + event.event_id, + ) + return + + destinations = set(destinations) + + if send_on_behalf_of is not None: + # If we are sending the event on behalf of another server + # then it already has the event and there is no reason to + # send the event to it. + destinations.discard(send_on_behalf_of) + + logger.debug("Sending %s to %r", event, destinations) + + self._send_pdu(event, destinations) + + @defer.inlineCallbacks + def handle_room_events(events): + for event in events: + yield handle_event(event) + + events_by_room = {} + for event in events: + events_by_room.setdefault(event.room_id, []).append(event) + + yield logcontext.make_deferred_yieldable(defer.gatherResults( + [ + logcontext.run_in_background(handle_room_events, evs) + for evs in itervalues(events_by_room) + ], + consumeErrors=True + )) + + yield self.store.update_federation_out_pos( + "events", next_token + ) + + if events: + now = self.clock.time_msec() + ts = yield self.store.get_received_ts(events[-1].event_id) + + synapse.metrics.event_processing_lag.labels( + "federation_sender").set(now - ts) + synapse.metrics.event_processing_last_ts.labels( + "federation_sender").set(ts) + + events_processed_counter.inc(len(events)) + + event_processing_loop_room_count.labels( + "federation_sender" + ).inc(len(events_by_room)) + + event_processing_loop_counter.labels("federation_sender").inc() + + synapse.metrics.event_processing_positions.labels( + "federation_sender").set(next_token) + + finally: + self._is_processing = False + + def _send_pdu(self, pdu, destinations): + # We loop through all destinations to see whether we already have + # a transaction in progress. If we do, stick it in the pending_pdus + # table and we'll get back to it later. + + order = self._order + self._order += 1 + + destinations = set(destinations) + destinations.discard(self.server_name) + logger.debug("Sending to: %s", str(destinations)) + + if not destinations: + return + + sent_pdus_destination_dist_total.inc(len(destinations)) + sent_pdus_destination_dist_count.inc() + + for destination in destinations: + self._get_per_destination_queue(destination).send_pdu(pdu, order) + + @defer.inlineCallbacks + def send_read_receipt(self, receipt): + """Send a RR to any other servers in the room + + Args: + receipt (synapse.types.ReadReceipt): receipt to be sent + """ + # Work out which remote servers should be poked and poke them. + domains = yield self.state.get_current_hosts_in_room(receipt.room_id) + domains = [d for d in domains if d != self.server_name] + if not domains: + return + + logger.debug("Sending receipt to: %r", domains) + + content = { + receipt.room_id: { + receipt.receipt_type: { + receipt.user_id: { + "event_ids": receipt.event_ids, + "data": receipt.data, + }, + }, + }, + } + key = (receipt.room_id, receipt.receipt_type, receipt.user_id) + + for domain in domains: + self.build_and_send_edu( + destination=domain, + edu_type="m.receipt", + content=content, + key=key, + ) + + @logcontext.preserve_fn # the caller should not yield on this + @defer.inlineCallbacks + def send_presence(self, states): + """Send the new presence states to the appropriate destinations. + + This actually queues up the presence states ready for sending and + triggers a background task to process them and send out the transactions. + + Args: + states (list(UserPresenceState)) + """ + if not self.hs.config.use_presence: + # No-op if presence is disabled. + return + + # First we queue up the new presence by user ID, so multiple presence + # updates in quick successtion are correctly handled + # We only want to send presence for our own users, so lets always just + # filter here just in case. + self.pending_presence.update({ + state.user_id: state for state in states + if self.is_mine_id(state.user_id) + }) + + # We then handle the new pending presence in batches, first figuring + # out the destinations we need to send each state to and then poking it + # to attempt a new transaction. We linearize this so that we don't + # accidentally mess up the ordering and send multiple presence updates + # in the wrong order + if self._processing_pending_presence: + return + + self._processing_pending_presence = True + try: + while True: + states_map = self.pending_presence + self.pending_presence = {} + + if not states_map: + break + + yield self._process_presence_inner(list(states_map.values())) + except Exception: + logger.exception("Error sending presence states to servers") + finally: + self._processing_pending_presence = False + + @measure_func("txnqueue._process_presence") + @defer.inlineCallbacks + def _process_presence_inner(self, states): + """Given a list of states populate self.pending_presence_by_dest and + poke to send a new transaction to each destination + + Args: + states (list(UserPresenceState)) + """ + hosts_and_states = yield get_interested_remotes(self.store, states, self.state) + + for destinations, states in hosts_and_states: + for destination in destinations: + if destination == self.server_name: + continue + self._get_per_destination_queue(destination).send_presence(states) + + def build_and_send_edu(self, destination, edu_type, content, key=None): + """Construct an Edu object, and queue it for sending + + Args: + destination (str): name of server to send to + edu_type (str): type of EDU to send + content (dict): content of EDU + key (Any|None): clobbering key for this edu + """ + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + + edu = Edu( + origin=self.server_name, + destination=destination, + edu_type=edu_type, + content=content, + ) + + self.send_edu(edu, key) + + def send_edu(self, edu, key): + """Queue an EDU for sending + + Args: + edu (Edu): edu to send + key (Any|None): clobbering key for this edu + """ + queue = self._get_per_destination_queue(edu.destination) + if key: + queue.send_keyed_edu(edu, key) + else: + queue.send_edu(edu) + + def send_device_messages(self, destination): + if destination == self.server_name: + logger.info("Not sending device update to ourselves") + return + + self._get_per_destination_queue(destination).attempt_new_transaction() + + def get_current_token(self): + return 0 diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py new file mode 100644 index 0000000000..385039add4 --- /dev/null +++ b/synapse/federation/sender/per_destination_queue.py @@ -0,0 +1,318 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime +import logging + +from prometheus_client import Counter + +from twisted.internet import defer + +from synapse.api.errors import ( + FederationDeniedError, + HttpResponseException, + RequestSendFailed, +) +from synapse.events import EventBase +from synapse.federation.units import Edu +from synapse.handlers.presence import format_user_presence_state +from synapse.metrics import sent_transactions_counter +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage import UserPresenceState +from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter + +logger = logging.getLogger(__name__) + + +sent_edus_counter = Counter( + "synapse_federation_client_sent_edus", + "Total number of EDUs successfully sent", +) + +sent_edus_by_type = Counter( + "synapse_federation_client_sent_edus_by_type", + "Number of sent EDUs successfully sent, by event type", + ["type"], +) + + +class PerDestinationQueue(object): + """ + Manages the per-destination transmission queues. + + Args: + hs (synapse.HomeServer): + transaction_sender (TransactionManager): + destination (str): the server_name of the destination that we are managing + transmission for. + """ + def __init__(self, hs, transaction_manager, destination): + self._server_name = hs.hostname + self._clock = hs.get_clock() + self._store = hs.get_datastore() + self._transaction_manager = transaction_manager + + self._destination = destination + self.transmission_loop_running = False + + # a list of tuples of (pending pdu, order) + self._pending_pdus = [] # type: list[tuple[EventBase, int]] + self._pending_edus = [] # type: list[Edu] + + # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered + # based on their key (e.g. typing events by room_id) + # Map of (edu_type, key) -> Edu + self._pending_edus_keyed = {} # type: dict[tuple[str, str], Edu] + + # Map of user_id -> UserPresenceState of pending presence to be sent to this + # destination + self._pending_presence = {} # type: dict[str, UserPresenceState] + + # stream_id of last successfully sent to-device message. + # NB: may be a long or an int. + self._last_device_stream_id = 0 + + # stream_id of last successfully sent device list update. + self._last_device_list_stream_id = 0 + + def pending_pdu_count(self): + return len(self._pending_pdus) + + def pending_edu_count(self): + return ( + len(self._pending_edus) + + len(self._pending_presence) + + len(self._pending_edus_keyed) + ) + + def send_pdu(self, pdu, order): + """Add a PDU to the queue, and start the transmission loop if neccessary + + Args: + pdu (EventBase): pdu to send + order (int): + """ + self._pending_pdus.append((pdu, order)) + self.attempt_new_transaction() + + def send_presence(self, states): + """Add presence updates to the queue. Start the transmission loop if neccessary. + + Args: + states (iterable[UserPresenceState]): presence to send + """ + self._pending_presence.update({ + state.user_id: state for state in states + }) + self.attempt_new_transaction() + + def send_keyed_edu(self, edu, key): + self._pending_edus_keyed[(edu.edu_type, key)] = edu + self.attempt_new_transaction() + + def send_edu(self, edu): + self._pending_edus.append(edu) + self.attempt_new_transaction() + + def attempt_new_transaction(self): + """Try to start a new transaction to this destination + + If there is already a transaction in progress to this destination, + returns immediately. Otherwise kicks off the process of sending a + transaction in the background. + """ + # list of (pending_pdu, deferred, order) + if self.transmission_loop_running: + # XXX: this can get stuck on by a never-ending + # request at which point pending_pdus just keeps growing. + # we need application-layer timeouts of some flavour of these + # requests + logger.debug( + "TX [%s] Transaction already in progress", + self._destination + ) + return + + logger.debug("TX [%s] Starting transaction loop", self._destination) + + run_as_background_process( + "federation_transaction_transmission_loop", + self._transaction_transmission_loop, + ) + + @defer.inlineCallbacks + def _transaction_transmission_loop(self): + pending_pdus = [] + try: + self.transmission_loop_running = True + + # This will throw if we wouldn't retry. We do this here so we fail + # quickly, but we will later check this again in the http client, + # hence why we throw the result away. + yield get_retry_limiter(self._destination, self._clock, self._store) + + pending_pdus = [] + while True: + device_message_edus, device_stream_id, dev_list_id = ( + yield self._get_new_device_messages() + ) + + # BEGIN CRITICAL SECTION + # + # In order to avoid a race condition, we need to make sure that + # the following code (from popping the queues up to the point + # where we decide if we actually have any pending messages) is + # atomic - otherwise new PDUs or EDUs might arrive in the + # meantime, but not get sent because we hold the + # transmission_loop_running flag. + + pending_pdus = self._pending_pdus + + # We can only include at most 50 PDUs per transactions + pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] + + pending_edus = self._pending_edus + + # We can only include at most 100 EDUs per transactions + pending_edus, self._pending_edus = pending_edus[:100], pending_edus[100:] + + pending_edus.extend( + self._pending_edus_keyed.values() + ) + + self._pending_edus_keyed = {} + + pending_edus.extend(device_message_edus) + + pending_presence = self._pending_presence + self._pending_presence = {} + if pending_presence: + pending_edus.append( + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.presence", + content={ + "push": [ + format_user_presence_state( + presence, self._clock.time_msec() + ) + for presence in pending_presence.values() + ] + }, + ) + ) + + if pending_pdus: + logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", + self._destination, len(pending_pdus)) + + if not pending_pdus and not pending_edus: + logger.debug("TX [%s] Nothing to send", self._destination) + self._last_device_stream_id = device_stream_id + return + + # END CRITICAL SECTION + + success = yield self._transaction_manager.send_new_transaction( + self._destination, pending_pdus, pending_edus + ) + if success: + sent_transactions_counter.inc() + sent_edus_counter.inc(len(pending_edus)) + for edu in pending_edus: + sent_edus_by_type.labels(edu.edu_type).inc() + # Remove the acknowledged device messages from the database + # Only bother if we actually sent some device messages + if device_message_edus: + yield self._store.delete_device_msgs_for_remote( + self._destination, device_stream_id + ) + logger.info( + "Marking as sent %r %r", self._destination, dev_list_id + ) + yield self._store.mark_as_sent_devices_by_remote( + self._destination, dev_list_id + ) + + self._last_device_stream_id = device_stream_id + self._last_device_list_stream_id = dev_list_id + else: + break + except NotRetryingDestination as e: + logger.debug( + "TX [%s] not ready for retry yet (next retry at %s) - " + "dropping transaction for now", + self._destination, + datetime.datetime.fromtimestamp( + (e.retry_last_ts + e.retry_interval) / 1000.0 + ), + ) + except FederationDeniedError as e: + logger.info(e) + except HttpResponseException as e: + logger.warning( + "TX [%s] Received %d response to transaction: %s", + self._destination, e.code, e, + ) + except RequestSendFailed as e: + logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) + + for p, _ in pending_pdus: + logger.info("Failed to send event %s to %s", p.event_id, + self._destination) + except Exception: + logger.exception( + "TX [%s] Failed to send transaction", + self._destination, + ) + for p, _ in pending_pdus: + logger.info("Failed to send event %s to %s", p.event_id, + self._destination) + finally: + # We want to be *very* sure we clear this after we stop processing + self.transmission_loop_running = False + + @defer.inlineCallbacks + def _get_new_device_messages(self): + last_device_stream_id = self._last_device_stream_id + to_device_stream_id = self._store.get_to_device_stream_token() + contents, stream_id = yield self._store.get_new_device_msgs_for_remote( + self._destination, last_device_stream_id, to_device_stream_id + ) + edus = [ + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.direct_to_device", + content=content, + ) + for content in contents + ] + + last_device_list = self._last_device_list_stream_id + now_stream_id, results = yield self._store.get_devices_by_remote( + self._destination, last_device_list + ) + edus.extend( + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.device_list_update", + content=content, + ) + for content in results + ) + defer.returnValue((edus, stream_id, now_stream_id)) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py new file mode 100644 index 0000000000..35e6b8ff5b --- /dev/null +++ b/synapse/federation/sender/transaction_manager.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer + +from synapse.api.errors import HttpResponseException +from synapse.federation.persistence import TransactionActions +from synapse.federation.units import Transaction +from synapse.util.metrics import measure_func + +logger = logging.getLogger(__name__) + + +class TransactionManager(object): + """Helper class which handles building and sending transactions + + shared between PerDestinationQueue objects + """ + def __init__(self, hs): + self._server_name = hs.hostname + self.clock = hs.get_clock() # nb must be called this for @measure_func + self._store = hs.get_datastore() + self._transaction_actions = TransactionActions(self._store) + self._transport_layer = hs.get_federation_transport_client() + + # HACK to get unique tx id + self._next_txn_id = int(self.clock.time_msec()) + + @measure_func("_send_new_transaction") + @defer.inlineCallbacks + def send_new_transaction(self, destination, pending_pdus, pending_edus): + + # Sort based on the order field + pending_pdus.sort(key=lambda t: t[1]) + pdus = [x[0] for x in pending_pdus] + edus = pending_edus + + success = True + + logger.debug("TX [%s] _attempt_new_transaction", destination) + + txn_id = str(self._next_txn_id) + + logger.debug( + "TX [%s] {%s} Attempting new transaction" + " (pdus: %d, edus: %d)", + destination, txn_id, + len(pdus), + len(edus), + ) + + logger.debug("TX [%s] Persisting transaction...", destination) + + transaction = Transaction.create_new( + origin_server_ts=int(self.clock.time_msec()), + transaction_id=txn_id, + origin=self._server_name, + destination=destination, + pdus=pdus, + edus=edus, + ) + + self._next_txn_id += 1 + + yield self._transaction_actions.prepare_to_send(transaction) + + logger.debug("TX [%s] Persisted transaction", destination) + logger.info( + "TX [%s] {%s} Sending transaction [%s]," + " (PDUs: %d, EDUs: %d)", + destination, txn_id, + transaction.transaction_id, + len(pdus), + len(edus), + ) + + # Actually send the transaction + + # FIXME (erikj): This is a bit of a hack to make the Pdu age + # keys work + def json_data_cb(): + data = transaction.get_dict() + now = int(self.clock.time_msec()) + if "pdus" in data: + for p in data["pdus"]: + if "age_ts" in p: + unsigned = p.setdefault("unsigned", {}) + unsigned["age"] = now - int(p["age_ts"]) + del p["age_ts"] + return data + + try: + response = yield self._transport_layer.send_transaction( + transaction, json_data_cb + ) + code = 200 + except HttpResponseException as e: + code = e.code + response = e.response + + if e.code in (401, 404, 429) or 500 <= e.code: + logger.info( + "TX [%s] {%s} got %d response", + destination, txn_id, code + ) + raise e + + logger.info( + "TX [%s] {%s} got %d response", + destination, txn_id, code + ) + + yield self._transaction_actions.delivered( + transaction, code, response + ) + + logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id) + + if code == 200: + for e_id, r in response.get("pdus", {}).items(): + if "error" in r: + logger.warn( + "TX [%s] {%s} Remote returned error for %s: %s", + destination, txn_id, e_id, r, + ) + else: + for p in pdus: + logger.warn( + "TX [%s] {%s} Failed to send event %s", + destination, txn_id, p.event_id, + ) + success = False + + defer.returnValue(success) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py deleted file mode 100644 index 30941f5ad6..0000000000 --- a/synapse/federation/transaction_queue.py +++ /dev/null @@ -1,699 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import datetime -import logging - -from six import itervalues - -from prometheus_client import Counter - -from twisted.internet import defer - -import synapse.metrics -from synapse.api.errors import ( - FederationDeniedError, - HttpResponseException, - RequestSendFailed, -) -from synapse.handlers.presence import format_user_presence_state, get_interested_remotes -from synapse.metrics import ( - LaterGauge, - event_processing_loop_counter, - event_processing_loop_room_count, - events_processed_counter, - sent_transactions_counter, -) -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.util import logcontext -from synapse.util.metrics import measure_func -from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter - -from .persistence import TransactionActions -from .units import Edu, Transaction - -logger = logging.getLogger(__name__) - -sent_pdus_destination_dist_count = Counter( - "synapse_federation_client_sent_pdu_destinations:count", - "Number of PDUs queued for sending to one or more destinations", -) - -sent_pdus_destination_dist_total = Counter( - "synapse_federation_client_sent_pdu_destinations:total", "" - "Total number of PDUs queued for sending across all destinations", -) - -sent_edus_counter = Counter( - "synapse_federation_client_sent_edus", - "Total number of EDUs successfully sent", -) - -sent_edus_by_type = Counter( - "synapse_federation_client_sent_edus_by_type", - "Number of sent EDUs successfully sent, by event type", - ["type"], -) - - -class TransactionQueue(object): - """This class makes sure we only have one transaction in flight at - a time for a given destination. - - It batches pending PDUs into single transactions. - """ - - def __init__(self, hs): - self.hs = hs - self.server_name = hs.hostname - - self.store = hs.get_datastore() - self.state = hs.get_state_handler() - self.transaction_actions = TransactionActions(self.store) - - self.transport_layer = hs.get_federation_transport_client() - - self.clock = hs.get_clock() - self.is_mine_id = hs.is_mine_id - - # Is a mapping from destinations -> deferreds. Used to keep track - # of which destinations have transactions in flight and when they are - # done - self.pending_transactions = {} - - LaterGauge( - "synapse_federation_transaction_queue_pending_destinations", - "", - [], - lambda: len(self.pending_transactions), - ) - - # Is a mapping from destination -> list of - # tuple(pending pdus, deferred, order) - self.pending_pdus_by_dest = pdus = {} - # destination -> list of tuple(edu, deferred) - self.pending_edus_by_dest = edus = {} - - # Map of user_id -> UserPresenceState for all the pending presence - # to be sent out by user_id. Entries here get processed and put in - # pending_presence_by_dest - self.pending_presence = {} - - # Map of destination -> user_id -> UserPresenceState of pending presence - # to be sent to each destinations - self.pending_presence_by_dest = presence = {} - - # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered - # based on their key (e.g. typing events by room_id) - # Map of destination -> (edu_type, key) -> Edu - self.pending_edus_keyed_by_dest = edus_keyed = {} - - LaterGauge( - "synapse_federation_transaction_queue_pending_pdus", - "", - [], - lambda: sum(map(len, pdus.values())), - ) - LaterGauge( - "synapse_federation_transaction_queue_pending_edus", - "", - [], - lambda: ( - sum(map(len, edus.values())) - + sum(map(len, presence.values())) - + sum(map(len, edus_keyed.values())) - ), - ) - - # destination -> stream_id of last successfully sent to-device message. - # NB: may be a long or an int. - self.last_device_stream_id_by_dest = {} - - # destination -> stream_id of last successfully sent device list - # update. - self.last_device_list_stream_id_by_dest = {} - - # HACK to get unique tx id - self._next_txn_id = int(self.clock.time_msec()) - - self._order = 1 - - self._is_processing = False - self._last_poked_id = -1 - - self._processing_pending_presence = False - - def notify_new_events(self, current_id): - """This gets called when we have some new events we might want to - send out to other servers. - """ - self._last_poked_id = max(current_id, self._last_poked_id) - - if self._is_processing: - return - - # fire off a processing loop in the background - run_as_background_process( - "process_event_queue_for_federation", - self._process_event_queue_loop, - ) - - @defer.inlineCallbacks - def _process_event_queue_loop(self): - try: - self._is_processing = True - while True: - last_token = yield self.store.get_federation_out_pos("events") - next_token, events = yield self.store.get_all_new_events_stream( - last_token, self._last_poked_id, limit=100, - ) - - logger.debug("Handling %s -> %s", last_token, next_token) - - if not events and next_token >= self._last_poked_id: - break - - @defer.inlineCallbacks - def handle_event(event): - # Only send events for this server. - send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() - is_mine = self.is_mine_id(event.sender) - if not is_mine and send_on_behalf_of is None: - return - - try: - # Get the state from before the event. - # We need to make sure that this is the state from before - # the event and not from after it. - # Otherwise if the last member on a server in a room is - # banned then it won't receive the event because it won't - # be in the room after the ban. - destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=event.prev_event_ids(), - ) - except Exception: - logger.exception( - "Failed to calculate hosts in room for event: %s", - event.event_id, - ) - return - - destinations = set(destinations) - - if send_on_behalf_of is not None: - # If we are sending the event on behalf of another server - # then it already has the event and there is no reason to - # send the event to it. - destinations.discard(send_on_behalf_of) - - logger.debug("Sending %s to %r", event, destinations) - - self._send_pdu(event, destinations) - - @defer.inlineCallbacks - def handle_room_events(events): - for event in events: - yield handle_event(event) - - events_by_room = {} - for event in events: - events_by_room.setdefault(event.room_id, []).append(event) - - yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - logcontext.run_in_background(handle_room_events, evs) - for evs in itervalues(events_by_room) - ], - consumeErrors=True - )) - - yield self.store.update_federation_out_pos( - "events", next_token - ) - - if events: - now = self.clock.time_msec() - ts = yield self.store.get_received_ts(events[-1].event_id) - - synapse.metrics.event_processing_lag.labels( - "federation_sender").set(now - ts) - synapse.metrics.event_processing_last_ts.labels( - "federation_sender").set(ts) - - events_processed_counter.inc(len(events)) - - event_processing_loop_room_count.labels( - "federation_sender" - ).inc(len(events_by_room)) - - event_processing_loop_counter.labels("federation_sender").inc() - - synapse.metrics.event_processing_positions.labels( - "federation_sender").set(next_token) - - finally: - self._is_processing = False - - def _send_pdu(self, pdu, destinations): - # We loop through all destinations to see whether we already have - # a transaction in progress. If we do, stick it in the pending_pdus - # table and we'll get back to it later. - - order = self._order - self._order += 1 - - destinations = set(destinations) - destinations.discard(self.server_name) - logger.debug("Sending to: %s", str(destinations)) - - if not destinations: - return - - sent_pdus_destination_dist_total.inc(len(destinations)) - sent_pdus_destination_dist_count.inc() - - for destination in destinations: - self.pending_pdus_by_dest.setdefault(destination, []).append( - (pdu, order) - ) - - self._attempt_new_transaction(destination) - - @logcontext.preserve_fn # the caller should not yield on this - @defer.inlineCallbacks - def send_presence(self, states): - """Send the new presence states to the appropriate destinations. - - This actually queues up the presence states ready for sending and - triggers a background task to process them and send out the transactions. - - Args: - states (list(UserPresenceState)) - """ - if not self.hs.config.use_presence: - # No-op if presence is disabled. - return - - # First we queue up the new presence by user ID, so multiple presence - # updates in quick successtion are correctly handled - # We only want to send presence for our own users, so lets always just - # filter here just in case. - self.pending_presence.update({ - state.user_id: state for state in states - if self.is_mine_id(state.user_id) - }) - - # We then handle the new pending presence in batches, first figuring - # out the destinations we need to send each state to and then poking it - # to attempt a new transaction. We linearize this so that we don't - # accidentally mess up the ordering and send multiple presence updates - # in the wrong order - if self._processing_pending_presence: - return - - self._processing_pending_presence = True - try: - while True: - states_map = self.pending_presence - self.pending_presence = {} - - if not states_map: - break - - yield self._process_presence_inner(list(states_map.values())) - except Exception: - logger.exception("Error sending presence states to servers") - finally: - self._processing_pending_presence = False - - @measure_func("txnqueue._process_presence") - @defer.inlineCallbacks - def _process_presence_inner(self, states): - """Given a list of states populate self.pending_presence_by_dest and - poke to send a new transaction to each destination - - Args: - states (list(UserPresenceState)) - """ - hosts_and_states = yield get_interested_remotes(self.store, states, self.state) - - for destinations, states in hosts_and_states: - for destination in destinations: - if destination == self.server_name: - continue - - self.pending_presence_by_dest.setdefault( - destination, {} - ).update({ - state.user_id: state for state in states - }) - - self._attempt_new_transaction(destination) - - def send_edu(self, destination, edu_type, content, key=None): - edu = Edu( - origin=self.server_name, - destination=destination, - edu_type=edu_type, - content=content, - ) - - if destination == self.server_name: - logger.info("Not sending EDU to ourselves") - return - - if key: - self.pending_edus_keyed_by_dest.setdefault( - destination, {} - )[(edu.edu_type, key)] = edu - else: - self.pending_edus_by_dest.setdefault(destination, []).append(edu) - - self._attempt_new_transaction(destination) - - def send_device_messages(self, destination): - if destination == self.server_name: - logger.info("Not sending device update to ourselves") - return - - self._attempt_new_transaction(destination) - - def get_current_token(self): - return 0 - - def _attempt_new_transaction(self, destination): - """Try to start a new transaction to this destination - - If there is already a transaction in progress to this destination, - returns immediately. Otherwise kicks off the process of sending a - transaction in the background. - - Args: - destination (str): - - Returns: - None - """ - # list of (pending_pdu, deferred, order) - if destination in self.pending_transactions: - # XXX: pending_transactions can get stuck on by a never-ending - # request at which point pending_pdus_by_dest just keeps growing. - # we need application-layer timeouts of some flavour of these - # requests - logger.debug( - "TX [%s] Transaction already in progress", - destination - ) - return - - logger.debug("TX [%s] Starting transaction loop", destination) - - run_as_background_process( - "federation_transaction_transmission_loop", - self._transaction_transmission_loop, - destination, - ) - - @defer.inlineCallbacks - def _transaction_transmission_loop(self, destination): - pending_pdus = [] - try: - self.pending_transactions[destination] = 1 - - # This will throw if we wouldn't retry. We do this here so we fail - # quickly, but we will later check this again in the http client, - # hence why we throw the result away. - yield get_retry_limiter(destination, self.clock, self.store) - - pending_pdus = [] - while True: - device_message_edus, device_stream_id, dev_list_id = ( - yield self._get_new_device_messages(destination) - ) - - # BEGIN CRITICAL SECTION - # - # In order to avoid a race condition, we need to make sure that - # the following code (from popping the queues up to the point - # where we decide if we actually have any pending messages) is - # atomic - otherwise new PDUs or EDUs might arrive in the - # meantime, but not get sent because we hold the - # pending_transactions flag. - - pending_pdus = self.pending_pdus_by_dest.pop(destination, []) - - # We can only include at most 50 PDUs per transactions - pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:] - if leftover_pdus: - self.pending_pdus_by_dest[destination] = leftover_pdus - - pending_edus = self.pending_edus_by_dest.pop(destination, []) - - # We can only include at most 100 EDUs per transactions - pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:] - if leftover_edus: - self.pending_edus_by_dest[destination] = leftover_edus - - pending_presence = self.pending_presence_by_dest.pop(destination, {}) - - pending_edus.extend( - self.pending_edus_keyed_by_dest.pop(destination, {}).values() - ) - - pending_edus.extend(device_message_edus) - if pending_presence: - pending_edus.append( - Edu( - origin=self.server_name, - destination=destination, - edu_type="m.presence", - content={ - "push": [ - format_user_presence_state( - presence, self.clock.time_msec() - ) - for presence in pending_presence.values() - ] - }, - ) - ) - - if pending_pdus: - logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", - destination, len(pending_pdus)) - - if not pending_pdus and not pending_edus: - logger.debug("TX [%s] Nothing to send", destination) - self.last_device_stream_id_by_dest[destination] = ( - device_stream_id - ) - return - - # END CRITICAL SECTION - - success = yield self._send_new_transaction( - destination, pending_pdus, pending_edus, - ) - if success: - sent_transactions_counter.inc() - sent_edus_counter.inc(len(pending_edus)) - for edu in pending_edus: - sent_edus_by_type.labels(edu.edu_type).inc() - # Remove the acknowledged device messages from the database - # Only bother if we actually sent some device messages - if device_message_edus: - yield self.store.delete_device_msgs_for_remote( - destination, device_stream_id - ) - logger.info("Marking as sent %r %r", destination, dev_list_id) - yield self.store.mark_as_sent_devices_by_remote( - destination, dev_list_id - ) - - self.last_device_stream_id_by_dest[destination] = device_stream_id - self.last_device_list_stream_id_by_dest[destination] = dev_list_id - else: - break - except NotRetryingDestination as e: - logger.debug( - "TX [%s] not ready for retry yet (next retry at %s) - " - "dropping transaction for now", - destination, - datetime.datetime.fromtimestamp( - (e.retry_last_ts + e.retry_interval) / 1000.0 - ), - ) - except FederationDeniedError as e: - logger.info(e) - except HttpResponseException as e: - logger.warning( - "TX [%s] Received %d response to transaction: %s", - destination, e.code, e, - ) - except RequestSendFailed as e: - logger.warning("TX [%s] Failed to send transaction: %s", destination, e) - - for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - destination) - except Exception: - logger.exception( - "TX [%s] Failed to send transaction", - destination, - ) - for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - destination) - finally: - # We want to be *very* sure we delete this after we stop processing - self.pending_transactions.pop(destination, None) - - @defer.inlineCallbacks - def _get_new_device_messages(self, destination): - last_device_stream_id = self.last_device_stream_id_by_dest.get(destination, 0) - to_device_stream_id = self.store.get_to_device_stream_token() - contents, stream_id = yield self.store.get_new_device_msgs_for_remote( - destination, last_device_stream_id, to_device_stream_id - ) - edus = [ - Edu( - origin=self.server_name, - destination=destination, - edu_type="m.direct_to_device", - content=content, - ) - for content in contents - ] - - last_device_list = self.last_device_list_stream_id_by_dest.get(destination, 0) - now_stream_id, results = yield self.store.get_devices_by_remote( - destination, last_device_list - ) - edus.extend( - Edu( - origin=self.server_name, - destination=destination, - edu_type="m.device_list_update", - content=content, - ) - for content in results - ) - defer.returnValue((edus, stream_id, now_stream_id)) - - @measure_func("_send_new_transaction") - @defer.inlineCallbacks - def _send_new_transaction(self, destination, pending_pdus, pending_edus): - - # Sort based on the order field - pending_pdus.sort(key=lambda t: t[1]) - pdus = [x[0] for x in pending_pdus] - edus = pending_edus - - success = True - - logger.debug("TX [%s] _attempt_new_transaction", destination) - - txn_id = str(self._next_txn_id) - - logger.debug( - "TX [%s] {%s} Attempting new transaction" - " (pdus: %d, edus: %d)", - destination, txn_id, - len(pdus), - len(edus), - ) - - logger.debug("TX [%s] Persisting transaction...", destination) - - transaction = Transaction.create_new( - origin_server_ts=int(self.clock.time_msec()), - transaction_id=txn_id, - origin=self.server_name, - destination=destination, - pdus=pdus, - edus=edus, - ) - - self._next_txn_id += 1 - - yield self.transaction_actions.prepare_to_send(transaction) - - logger.debug("TX [%s] Persisted transaction", destination) - logger.info( - "TX [%s] {%s} Sending transaction [%s]," - " (PDUs: %d, EDUs: %d)", - destination, txn_id, - transaction.transaction_id, - len(pdus), - len(edus), - ) - - # Actually send the transaction - - # FIXME (erikj): This is a bit of a hack to make the Pdu age - # keys work - def json_data_cb(): - data = transaction.get_dict() - now = int(self.clock.time_msec()) - if "pdus" in data: - for p in data["pdus"]: - if "age_ts" in p: - unsigned = p.setdefault("unsigned", {}) - unsigned["age"] = now - int(p["age_ts"]) - del p["age_ts"] - return data - - try: - response = yield self.transport_layer.send_transaction( - transaction, json_data_cb - ) - code = 200 - except HttpResponseException as e: - code = e.code - response = e.response - - if e.code in (401, 404, 429) or 500 <= e.code: - logger.info( - "TX [%s] {%s} got %d response", - destination, txn_id, code - ) - raise e - - logger.info( - "TX [%s] {%s} got %d response", - destination, txn_id, code - ) - - yield self.transaction_actions.delivered( - transaction, code, response - ) - - logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id) - - if code == 200: - for e_id, r in response.get("pdus", {}).items(): - if "error" in r: - logger.warn( - "TX [%s] {%s} Remote returned error for %s: %s", - destination, txn_id, e_id, r, - ) - else: - for p in pdus: - logger.warn( - "TX [%s] {%s} Failed to send event %s", - destination, txn_id, p.event_id, - ) - success = False - - defer.returnValue(success) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 5ba94be2ec..96d680a5ad 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -393,7 +393,7 @@ class FederationStateServlet(BaseFederationServlet): return self.handler.on_context_state_request( origin, context, - parse_string_from_args(query, "event_id", None), + parse_string_from_args(query, "event_id", None, required=True), ) @@ -404,7 +404,7 @@ class FederationStateIdsServlet(BaseFederationServlet): return self.handler.on_state_ids_request( origin, room_id, - parse_string_from_args(query, "event_id", None), + parse_string_from_args(query, "event_id", None, required=True), ) @@ -759,7 +759,7 @@ class FederationVersionServlet(BaseFederationServlet): class FederationGroupsProfileServlet(BaseFederationServlet): """Get/set the basic profile of a group on behalf of a user """ - PATH = "/groups/(?P<group_id>[^/]*)/profile$" + PATH = "/groups/(?P<group_id>[^/]*)/profile" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -787,7 +787,7 @@ class FederationGroupsProfileServlet(BaseFederationServlet): class FederationGroupsSummaryServlet(BaseFederationServlet): - PATH = "/groups/(?P<group_id>[^/]*)/summary$" + PATH = "/groups/(?P<group_id>[^/]*)/summary" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -805,7 +805,7 @@ class FederationGroupsSummaryServlet(BaseFederationServlet): class FederationGroupsRoomsServlet(BaseFederationServlet): """Get the rooms in a group on behalf of a user """ - PATH = "/groups/(?P<group_id>[^/]*)/rooms$" + PATH = "/groups/(?P<group_id>[^/]*)/rooms" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -823,7 +823,7 @@ class FederationGroupsRoomsServlet(BaseFederationServlet): class FederationGroupsAddRoomsServlet(BaseFederationServlet): """Add/remove room from group """ - PATH = "/groups/(?P<group_id>[^/]*)/room/(?P<room_id>[^/]*)$" + PATH = "/groups/(?P<group_id>[^/]*)/room/(?P<room_id>[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, room_id): @@ -855,7 +855,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): """ PATH = ( "/groups/(?P<group_id>[^/]*)/room/(?P<room_id>[^/]*)" - "/config/(?P<config_key>[^/]*)$" + "/config/(?P<config_key>[^/]*)" ) @defer.inlineCallbacks @@ -874,7 +874,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): class FederationGroupsUsersServlet(BaseFederationServlet): """Get the users in a group on behalf of a user """ - PATH = "/groups/(?P<group_id>[^/]*)/users$" + PATH = "/groups/(?P<group_id>[^/]*)/users" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -892,7 +892,7 @@ class FederationGroupsUsersServlet(BaseFederationServlet): class FederationGroupsInvitedUsersServlet(BaseFederationServlet): """Get the users that have been invited to a group """ - PATH = "/groups/(?P<group_id>[^/]*)/invited_users$" + PATH = "/groups/(?P<group_id>[^/]*)/invited_users" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -910,7 +910,7 @@ class FederationGroupsInvitedUsersServlet(BaseFederationServlet): class FederationGroupsInviteServlet(BaseFederationServlet): """Ask a group server to invite someone to the group """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -928,7 +928,7 @@ class FederationGroupsInviteServlet(BaseFederationServlet): class FederationGroupsAcceptInviteServlet(BaseFederationServlet): """Accept an invitation from the group server """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/accept_invite$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/accept_invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -945,7 +945,7 @@ class FederationGroupsAcceptInviteServlet(BaseFederationServlet): class FederationGroupsJoinServlet(BaseFederationServlet): """Attempt to join a group """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/join$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/join" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -962,7 +962,7 @@ class FederationGroupsJoinServlet(BaseFederationServlet): class FederationGroupsRemoveUserServlet(BaseFederationServlet): """Leave or kick a user from the group """ - PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove$" + PATH = "/groups/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -980,7 +980,7 @@ class FederationGroupsRemoveUserServlet(BaseFederationServlet): class FederationGroupsLocalInviteServlet(BaseFederationServlet): """A group server has invited a local user """ - PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite$" + PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -997,7 +997,7 @@ class FederationGroupsLocalInviteServlet(BaseFederationServlet): class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): """A group server has removed a local user """ - PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove$" + PATH = "/groups/local/(?P<group_id>[^/]*)/users/(?P<user_id>[^/]*)/remove" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -1014,7 +1014,7 @@ class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): """A group or user's server renews their attestation """ - PATH = "/groups/(?P<group_id>[^/]*)/renew_attestation/(?P<user_id>[^/]*)$" + PATH = "/groups/(?P<group_id>[^/]*)/renew_attestation/(?P<user_id>[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -1037,7 +1037,7 @@ class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): PATH = ( "/groups/(?P<group_id>[^/]*)/summary" "(/categories/(?P<category_id>[^/]+))?" - "/rooms/(?P<room_id>[^/]*)$" + "/rooms/(?P<room_id>[^/]*)" ) @defer.inlineCallbacks @@ -1080,7 +1080,7 @@ class FederationGroupsCategoriesServlet(BaseFederationServlet): """Get all categories for a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/categories/$" + "/groups/(?P<group_id>[^/]*)/categories/" ) @defer.inlineCallbacks @@ -1100,7 +1100,7 @@ class FederationGroupsCategoryServlet(BaseFederationServlet): """Add/remove/get a category in a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/categories/(?P<category_id>[^/]+)$" + "/groups/(?P<group_id>[^/]*)/categories/(?P<category_id>[^/]+)" ) @defer.inlineCallbacks @@ -1150,7 +1150,7 @@ class FederationGroupsRolesServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/roles/$" + "/groups/(?P<group_id>[^/]*)/roles/" ) @defer.inlineCallbacks @@ -1170,7 +1170,7 @@ class FederationGroupsRoleServlet(BaseFederationServlet): """Add/remove/get a role in a group """ PATH = ( - "/groups/(?P<group_id>[^/]*)/roles/(?P<role_id>[^/]+)$" + "/groups/(?P<group_id>[^/]*)/roles/(?P<role_id>[^/]+)" ) @defer.inlineCallbacks @@ -1226,7 +1226,7 @@ class FederationGroupsSummaryUsersServlet(BaseFederationServlet): PATH = ( "/groups/(?P<group_id>[^/]*)/summary" "(/roles/(?P<role_id>[^/]+))?" - "/users/(?P<user_id>[^/]*)$" + "/users/(?P<user_id>[^/]*)" ) @defer.inlineCallbacks @@ -1269,7 +1269,7 @@ class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/get_groups_publicised$" + "/get_groups_publicised" ) @defer.inlineCallbacks @@ -1284,7 +1284,7 @@ class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): """Sets whether a group is joinable without an invite or knock """ - PATH = "/groups/(?P<group_id>[^/]*)/settings/m.join_policy$" + PATH = "/groups/(?P<group_id>[^/]*)/settings/m.join_policy" @defer.inlineCallbacks def on_PUT(self, origin, content, query, group_id): diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 594754cfd8..d8d86d6ff3 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -93,9 +93,9 @@ class BaseHandler(object): messages_per_second = self.hs.config.rc_messages_per_second burst_count = self.hs.config.rc_message_burst_count - allowed, time_allowed = self.ratelimiter.send_message( + allowed, time_allowed = self.ratelimiter.can_do_action( user_id, time_now, - msg_rate_hz=messages_per_second, + rate_hz=messages_per_second, burst_count=burst_count, update=update, ) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 2abd9af94f..caad9ae2dd 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.module_api import ModuleApi from synapse.types import UserID from synapse.util import logcontext @@ -99,6 +100,11 @@ class AuthHandler(BaseHandler): login_types.append(t) self._supported_login_types = login_types + self._account_ratelimiter = Ratelimiter() + self._failed_attempts_ratelimiter = Ratelimiter() + + self._clock = self.hs.get_clock() + @defer.inlineCallbacks def validate_user_via_ui_auth(self, requester, request_body, clientip): """ @@ -568,7 +574,12 @@ class AuthHandler(BaseHandler): Returns: defer.Deferred: (unicode) canonical_user_id, or None if zero or multiple matches + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ + self.ratelimit_login_per_account(user_id) res = yield self._find_user_id_and_pwd_hash(user_id) if res is not None: defer.returnValue(res[0]) @@ -634,6 +645,8 @@ class AuthHandler(BaseHandler): StoreError if there was a problem accessing the database SynapseError if there was a problem with the request LoginError if there was an authentication problem. + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ if username.startswith('@'): @@ -643,6 +656,8 @@ class AuthHandler(BaseHandler): username, self.hs.hostname ).to_string() + self.ratelimit_login_per_account(qualified_user_id) + login_type = login_submission.get("type") known_login_type = False @@ -715,9 +730,16 @@ class AuthHandler(BaseHandler): if not known_login_type: raise SynapseError(400, "Unknown login type %s" % login_type) - # unknown username or invalid password. We raise a 403 here, but note - # that if we're doing user-interactive login, it turns all LoginErrors - # into a 401 anyway. + # unknown username or invalid password. + self._failed_attempts_ratelimiter.ratelimit( + qualified_user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=True, + ) + + # We raise a 403 here, but note that if we're doing user-interactive + # login, it turns all LoginErrors into a 401 anyway. raise LoginError( 403, "Invalid password", errcode=Codes.FORBIDDEN @@ -735,6 +757,10 @@ class AuthHandler(BaseHandler): password (unicode): the provided password Returns: (unicode) the canonical_user_id, or None if unknown user / bad password + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ lookupres = yield self._find_user_id_and_pwd_hash(user_id) if not lookupres: @@ -763,6 +789,7 @@ class AuthHandler(BaseHandler): auth_api.validate_macaroon(macaroon, "login", True, user_id) except Exception: raise AuthError(403, "Invalid token", errcode=Codes.FORBIDDEN) + self.ratelimit_login_per_account(user_id) yield self.auth.check_auth_blocking(user_id) defer.returnValue(user_id) @@ -934,6 +961,33 @@ class AuthHandler(BaseHandler): else: return defer.succeed(False) + def ratelimit_login_per_account(self, user_id): + """Checks whether the process must be stopped because of ratelimiting. + + Checks against two ratelimiters: the generic one for login attempts per + account and the one specific to failed attempts. + + Args: + user_id (unicode): complete @user:id + + Raises: + LimitExceededError if one of the ratelimiters' login requests count + for this user is too high too proceed. + """ + self._failed_attempts_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=False, + ) + + self._account_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_account.per_second, + burst_count=self.hs.config.rc_login_account.burst_count, + update=True, + ) + @attr.s class MacaroonGenerator(object): diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c708c35d4d..b398848079 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -37,13 +37,185 @@ from ._base import BaseHandler logger = logging.getLogger(__name__) -class DeviceHandler(BaseHandler): +class DeviceWorkerHandler(BaseHandler): def __init__(self, hs): - super(DeviceHandler, self).__init__(hs) + super(DeviceWorkerHandler, self).__init__(hs) self.hs = hs self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() + + @defer.inlineCallbacks + def get_devices_by_user(self, user_id): + """ + Retrieve the given user's devices + + Args: + user_id (str): + Returns: + defer.Deferred: list[dict[str, X]]: info on each device + """ + + device_map = yield self.store.get_devices_by_user(user_id) + + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id=None + ) + + devices = list(device_map.values()) + for device in devices: + _update_device_from_client_ips(device, ips) + + defer.returnValue(devices) + + @defer.inlineCallbacks + def get_device(self, user_id, device_id): + """ Retrieve the given device + + Args: + user_id (str): + device_id (str): + + Returns: + defer.Deferred: dict[str, X]: info on the device + Raises: + errors.NotFoundError: if the device was not found + """ + try: + device = yield self.store.get_device(user_id, device_id) + except errors.StoreError: + raise errors.NotFoundError + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id, + ) + _update_device_from_client_ips(device, ips) + defer.returnValue(device) + + @measure_func("device.get_user_ids_changed") + @defer.inlineCallbacks + def get_user_ids_changed(self, user_id, from_token): + """Get list of users that have had the devices updated, or have newly + joined a room, that `user_id` may be interested in. + + Args: + user_id (str) + from_token (StreamToken) + """ + now_room_key = yield self.store.get_room_events_max_id() + + room_ids = yield self.store.get_rooms_for_user(user_id) + + # First we check if any devices have changed + changed = yield self.store.get_user_whose_devices_changed( + from_token.device_list_key + ) + + # Then work out if any users have since joined + rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) + + member_events = yield self.store.get_membership_changes_for_user( + user_id, from_token.room_key, now_room_key, + ) + rooms_changed.update(event.room_id for event in member_events) + + stream_ordering = RoomStreamToken.parse_stream_token( + from_token.room_key + ).stream + + possibly_changed = set(changed) + possibly_left = set() + for room_id in rooms_changed: + current_state_ids = yield self.store.get_current_state_ids(room_id) + + # The user may have left the room + # TODO: Check if they actually did or if we were just invited. + if room_id not in room_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_left.add(state_key) + continue + + # Fetch the current state at the time. + try: + event_ids = yield self.store.get_forward_extremeties_for_room( + room_id, stream_ordering=stream_ordering + ) + except errors.StoreError: + # we have purged the stream_ordering index since the stream + # ordering: treat it the same as a new room + event_ids = [] + + # special-case for an empty prev state: include all members + # in the changed list + if not event_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + continue + + current_member_id = current_state_ids.get((EventTypes.Member, user_id)) + if not current_member_id: + continue + + # mapping from event_id -> state_dict + prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) + + # Check if we've joined the room? If so we just blindly add all the users to + # the "possibly changed" users. + for state_dict in itervalues(prev_state_ids): + member_event = state_dict.get((EventTypes.Member, user_id), None) + if not member_event or member_event != current_member_id: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + break + + # If there has been any change in membership, include them in the + # possibly changed list. We'll check if they are joined below, + # and we're not toooo worried about spuriously adding users. + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + + # check if this member has changed since any of the extremities + # at the stream_ordering, and add them to the list if so. + for state_dict in itervalues(prev_state_ids): + prev_event_id = state_dict.get(key, None) + if not prev_event_id or prev_event_id != event_id: + if state_key != user_id: + possibly_changed.add(state_key) + break + + if possibly_changed or possibly_left: + users_who_share_room = yield self.store.get_users_who_share_room_with_user( + user_id + ) + + # Take the intersection of the users whose devices may have changed + # and those that actually still share a room with the user + possibly_joined = possibly_changed & users_who_share_room + possibly_left = (possibly_changed | possibly_left) - users_who_share_room + else: + possibly_joined = [] + possibly_left = [] + + defer.returnValue({ + "changed": list(possibly_joined), + "left": list(possibly_left), + }) + + +class DeviceHandler(DeviceWorkerHandler): + def __init__(self, hs): + super(DeviceHandler, self).__init__(hs) + self.federation_sender = hs.get_federation_sender() self._edu_updater = DeviceListEduUpdater(hs, self) @@ -104,52 +276,6 @@ class DeviceHandler(BaseHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") @defer.inlineCallbacks - def get_devices_by_user(self, user_id): - """ - Retrieve the given user's devices - - Args: - user_id (str): - Returns: - defer.Deferred: list[dict[str, X]]: info on each device - """ - - device_map = yield self.store.get_devices_by_user(user_id) - - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id=None - ) - - devices = list(device_map.values()) - for device in devices: - _update_device_from_client_ips(device, ips) - - defer.returnValue(devices) - - @defer.inlineCallbacks - def get_device(self, user_id, device_id): - """ Retrieve the given device - - Args: - user_id (str): - device_id (str): - - Returns: - defer.Deferred: dict[str, X]: info on the device - Raises: - errors.NotFoundError: if the device was not found - """ - try: - device = yield self.store.get_device(user_id, device_id) - except errors.StoreError: - raise errors.NotFoundError - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id, - ) - _update_device_from_client_ips(device, ips) - defer.returnValue(device) - - @defer.inlineCallbacks def delete_device(self, user_id, device_id): """ Delete the given device @@ -276,6 +402,12 @@ class DeviceHandler(BaseHandler): user_id, device_ids, list(hosts) ) + for device_id in device_ids: + logger.debug( + "Notifying about update %r/%r, ID: %r", user_id, device_id, + position, + ) + room_ids = yield self.store.get_rooms_for_user(user_id) yield self.notifier.on_new_event( @@ -283,130 +415,10 @@ class DeviceHandler(BaseHandler): ) if hosts: - logger.info("Sending device list update notif to: %r", hosts) + logger.info("Sending device list update notif for %r to: %r", user_id, hosts) for host in hosts: self.federation_sender.send_device_messages(host) - @measure_func("device.get_user_ids_changed") - @defer.inlineCallbacks - def get_user_ids_changed(self, user_id, from_token): - """Get list of users that have had the devices updated, or have newly - joined a room, that `user_id` may be interested in. - - Args: - user_id (str) - from_token (StreamToken) - """ - now_token = yield self.hs.get_event_sources().get_current_token() - - room_ids = yield self.store.get_rooms_for_user(user_id) - - # First we check if any devices have changed - changed = yield self.store.get_user_whose_devices_changed( - from_token.device_list_key - ) - - # Then work out if any users have since joined - rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) - - member_events = yield self.store.get_membership_changes_for_user( - user_id, from_token.room_key, now_token.room_key - ) - rooms_changed.update(event.room_id for event in member_events) - - stream_ordering = RoomStreamToken.parse_stream_token( - from_token.room_key - ).stream - - possibly_changed = set(changed) - possibly_left = set() - for room_id in rooms_changed: - current_state_ids = yield self.store.get_current_state_ids(room_id) - - # The user may have left the room - # TODO: Check if they actually did or if we were just invited. - if room_id not in room_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_left.add(state_key) - continue - - # Fetch the current state at the time. - try: - event_ids = yield self.store.get_forward_extremeties_for_room( - room_id, stream_ordering=stream_ordering - ) - except errors.StoreError: - # we have purged the stream_ordering index since the stream - # ordering: treat it the same as a new room - event_ids = [] - - # special-case for an empty prev state: include all members - # in the changed list - if not event_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - continue - - current_member_id = current_state_ids.get((EventTypes.Member, user_id)) - if not current_member_id: - continue - - # mapping from event_id -> state_dict - prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) - - # Check if we've joined the room? If so we just blindly add all the users to - # the "possibly changed" users. - for state_dict in itervalues(prev_state_ids): - member_event = state_dict.get((EventTypes.Member, user_id), None) - if not member_event or member_event != current_member_id: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - break - - # If there has been any change in membership, include them in the - # possibly changed list. We'll check if they are joined below, - # and we're not toooo worried about spuriously adding users. - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - - # check if this member has changed since any of the extremities - # at the stream_ordering, and add them to the list if so. - for state_dict in itervalues(prev_state_ids): - prev_event_id = state_dict.get(key, None) - if not prev_event_id or prev_event_id != event_id: - if state_key != user_id: - possibly_changed.add(state_key) - break - - if possibly_changed or possibly_left: - users_who_share_room = yield self.store.get_users_who_share_room_with_user( - user_id - ) - - # Take the intersection of the users whose devices may have changed - # and those that actually still share a room with the user - possibly_joined = possibly_changed & users_who_share_room - possibly_left = (possibly_changed | possibly_left) - users_who_share_room - else: - possibly_joined = [] - possibly_left = [] - - defer.returnValue({ - "changed": list(possibly_joined), - "left": list(possibly_left), - }) - @defer.inlineCallbacks def on_federation_query_user_devices(self, user_id): stream_id, devices = yield self.store.get_devices_with_keys_by_user(user_id) @@ -473,15 +485,26 @@ class DeviceListEduUpdater(object): if get_domain_from_id(user_id) != origin: # TODO: Raise? - logger.warning("Got device list update edu for %r from %r", user_id, origin) + logger.warning( + "Got device list update edu for %r/%r from %r", + user_id, device_id, origin, + ) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. + logger.warning( + "Got device list update edu for %r/%r, but don't share a room", + user_id, device_id, + ) return + logger.debug( + "Received device list update for %r/%r", user_id, device_id, + ) + self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content) ) @@ -499,10 +522,18 @@ class DeviceListEduUpdater(object): # This can happen since we batch updates return + for device_id, stream_id, prev_ids, content in pending_updates: + logger.debug( + "Handling update %r/%r, ID: %r, prev: %r ", + user_id, device_id, stream_id, prev_ids, + ) + # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) + logger.debug("Need to re-sync devices for %r? %r", user_id, resync) + if resync: # Fetch all devices for the user. origin = get_domain_from_id(user_id) @@ -555,11 +586,21 @@ class DeviceListEduUpdater(object): ) devices = [] + for device in devices: + logger.debug( + "Handling resync update %r/%r, ID: %r", + user_id, device["device_id"], stream_id, + ) + yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update(user_id, device_ids) + + # We clobber the seen updates since we've re-synced from a given + # point. + self._seen_updates[user_id] = set([stream_id]) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) @@ -572,9 +613,9 @@ class DeviceListEduUpdater(object): user_id, [device_id for device_id, _, _, _ in pending_updates] ) - self._seen_updates.setdefault(user_id, set()).update( - stream_id for _, stream_id, _, _ in pending_updates - ) + self._seen_updates.setdefault(user_id, set()).update( + stream_id for _, stream_id, _, _ in pending_updates + ) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): @@ -587,6 +628,11 @@ class DeviceListEduUpdater(object): user_id ) + logger.debug( + "Current extremity for %r: %r", + user_id, extremity, + ) + stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f80486102a..9eaf2d3e18 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -45,6 +45,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.crypto.event_signing import compute_event_signature +from synapse.event_auth import auth_types_for_event from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -858,6 +859,52 @@ class FederationHandler(BaseHandler): logger.debug("Not backfilling as no extremeties found.") return + # We only want to paginate if we can actually see the events we'll get, + # as otherwise we'll just spend a lot of resources to get redacted + # events. + # + # We do this by filtering all the backwards extremities and seeing if + # any remain. Given we don't have the extremity events themselves, we + # need to actually check the events that reference them. + # + # *Note*: the spec wants us to keep backfilling until we reach the start + # of the room in case we are allowed to see some of the history. However + # in practice that causes more issues than its worth, as a) its + # relatively rare for there to be any visible history and b) even when + # there is its often sufficiently long ago that clients would stop + # attempting to paginate before backfill reached the visible history. + # + # TODO: If we do do a backfill then we should filter the backwards + # extremities to only include those that point to visible portions of + # history. + # + # TODO: Correctly handle the case where we are allowed to see the + # forward event but not the backward extremity, e.g. in the case of + # initial join of the server where we are allowed to see the join + # event but not anything before it. This would require looking at the + # state *before* the event, ignoring the special casing certain event + # types have. + + forward_events = yield self.store.get_successor_events( + list(extremities), + ) + + extremities_events = yield self.store.get_events( + forward_events, + check_redacted=False, + get_prev_content=False, + ) + + # We set `check_history_visibility_only` as we might otherwise get false + # positives from users having been erased. + filtered_extremities = yield filter_events_for_server( + self.store, self.server_name, list(extremities_events.values()), + redact=False, check_history_visibility_only=True, + ) + + if not filtered_extremities: + defer.returnValue(False) + # Check if we reached a point where we should start backfilling. sorted_extremeties_tuple = sorted( extremities.items(), @@ -1582,6 +1629,7 @@ class FederationHandler(BaseHandler): origin, event, state=state, auth_events=auth_events, + backfilled=backfilled, ) # reraise does not allow inlineCallbacks to preserve the stacktrace, so we @@ -1626,6 +1674,7 @@ class FederationHandler(BaseHandler): event, state=ev_info.get("state"), auth_events=ev_info.get("auth_events"), + backfilled=backfilled, ) defer.returnValue(res) @@ -1748,7 +1797,7 @@ class FederationHandler(BaseHandler): ) @defer.inlineCallbacks - def _prep_event(self, origin, event, state=None, auth_events=None): + def _prep_event(self, origin, event, state, auth_events, backfilled): """ Args: @@ -1756,6 +1805,7 @@ class FederationHandler(BaseHandler): event: state: auth_events: + backfilled (bool) Returns: Deferred, which resolves to synapse.events.snapshot.EventContext @@ -1797,12 +1847,100 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR + if not context.rejected: + yield self._check_for_soft_fail(event, state, backfilled) + if event.type == EventTypes.GuestAccess and not context.rejected: yield self.maybe_kick_guest_users(event) defer.returnValue(context) @defer.inlineCallbacks + def _check_for_soft_fail(self, event, state, backfilled): + """Checks if we should soft fail the event, if so marks the event as + such. + + Args: + event (FrozenEvent) + state (dict|None): The state at the event if we don't have all the + event's prev events + backfilled (bool): Whether the event is from backfill + + Returns: + Deferred + """ + # For new (non-backfilled and non-outlier) events we check if the event + # passes auth based on the current state. If it doesn't then we + # "soft-fail" the event. + do_soft_fail_check = not backfilled and not event.internal_metadata.is_outlier() + if do_soft_fail_check: + extrem_ids = yield self.store.get_latest_event_ids_in_room( + event.room_id, + ) + + extrem_ids = set(extrem_ids) + prev_event_ids = set(event.prev_event_ids()) + + if extrem_ids == prev_event_ids: + # If they're the same then the current state is the same as the + # state at the event, so no point rechecking auth for soft fail. + do_soft_fail_check = False + + if do_soft_fail_check: + room_version = yield self.store.get_room_version(event.room_id) + + # Calculate the "current state". + if state is not None: + # If we're explicitly given the state then we won't have all the + # prev events, and so we have a gap in the graph. In this case + # we want to be a little careful as we might have been down for + # a while and have an incorrect view of the current state, + # however we still want to do checks as gaps are easy to + # maliciously manufacture. + # + # So we use a "current state" that is actually a state + # resolution across the current forward extremities and the + # given state at the event. This should correctly handle cases + # like bans, especially with state res v2. + + state_sets = yield self.store.get_state_groups( + event.room_id, extrem_ids, + ) + state_sets = list(state_sets.values()) + state_sets.append(state) + current_state_ids = yield self.state_handler.resolve_events( + room_version, state_sets, event, + ) + current_state_ids = { + k: e.event_id for k, e in iteritems(current_state_ids) + } + else: + current_state_ids = yield self.state_handler.get_current_state_ids( + event.room_id, latest_event_ids=extrem_ids, + ) + + # Now check if event pass auth against said current state + auth_types = auth_types_for_event(event) + current_state_ids = [ + e for k, e in iteritems(current_state_ids) + if k in auth_types + ] + + current_auth_events = yield self.store.get_events(current_state_ids) + current_auth_events = { + (e.type, e.state_key): e for e in current_auth_events.values() + } + + try: + self.auth.check(room_version, event, auth_events=current_auth_events) + except AuthError as e: + logger.warn( + "Failed current state auth resolution for %r because %s", + event, e, + ) + event.internal_metadata.soft_failed = True + + @defer.inlineCallbacks def on_query_auth(self, origin, event_id, room_id, remote_auth_chain, rejects, missing): in_room = yield self.auth.check_host_in_room( diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3981fe69ce..c762b58902 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -436,10 +436,11 @@ class EventCreationHandler(object): if event.is_state(): prev_state = yield self.deduplicate_state_event(event, context) - logger.info( - "Not bothering to persist duplicate state event %s", event.event_id, - ) if prev_state is not None: + logger.info( + "Not bothering to persist state event %s duplicated by %s", + event.event_id, prev_state.event_id, + ) defer.returnValue(prev_state) yield self.handle_new_client_event( diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index ba3856674d..37e87fc054 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -816,7 +816,7 @@ class PresenceHandler(object): if self.is_mine(observed_user): yield self.invite_presence(observed_user, observer_user) else: - yield self.federation.send_edu( + yield self.federation.build_and_send_edu( destination=observed_user.domain, edu_type="m.presence_invite", content={ @@ -836,7 +836,7 @@ class PresenceHandler(object): if self.is_mine(observer_user): yield self.accept_presence(observed_user, observer_user) else: - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence_accept", content={ @@ -848,7 +848,7 @@ class PresenceHandler(object): state_dict = yield self.get_state(observed_user, as_event=False) state_dict = format_user_presence_state(state_dict, self.clock.time_msec()) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence", content={ diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 696469732c..dd783ae134 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,10 +16,8 @@ import logging from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.types import get_domain_from_id - -from ._base import BaseHandler +from synapse.handlers._base import BaseHandler +from synapse.types import ReadReceipt logger = logging.getLogger(__name__) @@ -39,42 +37,17 @@ class ReceiptsHandler(BaseHandler): self.state = hs.get_state_handler() @defer.inlineCallbacks - def received_client_receipt(self, room_id, receipt_type, user_id, - event_id): - """Called when a client tells us a local user has read up to the given - event_id in the room. - """ - receipt = { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": [event_id], - "data": { - "ts": int(self.clock.time_msec()), - } - } - - is_new = yield self._handle_new_receipts([receipt]) - - if is_new: - # fire off a process in the background to send the receipt to - # remote servers - run_as_background_process( - 'push_receipts_to_remotes', self._push_remotes, receipt - ) - - @defer.inlineCallbacks def _received_remote_receipt(self, origin, content): """Called when we receive an EDU of type m.receipt from a remote HS. """ receipts = [ - { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": user_values["event_ids"], - "data": user_values.get("data", {}), - } + ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=user_values["event_ids"], + data=user_values.get("data", {}), + ) for room_id, room_values in content.items() for receipt_type, users in room_values.items() for user_id, user_values in users.items() @@ -90,14 +63,12 @@ class ReceiptsHandler(BaseHandler): max_batch_id = None for receipt in receipts: - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] - res = yield self.store.insert_receipt( - room_id, receipt_type, user_id, event_ids, data + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + receipt.event_ids, + receipt.data, ) if not res: @@ -115,7 +86,7 @@ class ReceiptsHandler(BaseHandler): # no new receipts defer.returnValue(False) - affected_room_ids = list(set([r["room_id"] for r in receipts])) + affected_room_ids = list(set([r.room_id for r in receipts])) self.notifier.on_new_event( "receipt_key", max_batch_id, rooms=affected_room_ids @@ -128,43 +99,26 @@ class ReceiptsHandler(BaseHandler): defer.returnValue(True) @defer.inlineCallbacks - def _push_remotes(self, receipt): - """Given a receipt, works out which remote servers should be - poked and pokes them. + def received_client_receipt(self, room_id, receipt_type, user_id, + event_id): + """Called when a client tells us a local user has read up to the given + event_id in the room. """ - try: - # TODO: optimise this to move some of the work to the workers. - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] - - users = yield self.state.get_current_user_in_room(room_id) - remotedomains = set(get_domain_from_id(u) for u in users) - remotedomains = remotedomains.copy() - remotedomains.discard(self.server_name) - - logger.debug("Sending receipt to: %r", remotedomains) - - for domain in remotedomains: - self.federation.send_edu( - destination=domain, - edu_type="m.receipt", - content={ - room_id: { - receipt_type: { - user_id: { - "event_ids": event_ids, - "data": data, - } - } - }, - }, - key=(room_id, receipt_type, user_id), - ) - except Exception: - logger.exception("Error pushing receipts to remote servers") + receipt = ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=[event_id], + data={ + "ts": int(self.clock.time_msec()), + }, + ) + + is_new = yield self._handle_new_receipts([receipt]) + if not is_new: + return + + self.federation.send_read_receipt(receipt) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index c0e06929bd..0ec16b1d2e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -24,6 +24,7 @@ from synapse.api.errors import ( AuthError, Codes, InvalidCaptchaError, + LimitExceededError, RegistrationError, SynapseError, ) @@ -60,6 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler + self.ratelimiter = hs.get_registration_ratelimiter() self._next_generated_user_id = None @@ -149,6 +151,7 @@ class RegistrationHandler(BaseHandler): threepid=None, user_type=None, default_display_name=None, + address=None, ): """Registers a new client on the server. @@ -167,6 +170,7 @@ class RegistrationHandler(BaseHandler): api.constants.UserTypes, or None for a normal user. default_display_name (unicode|None): if set, the new user's displayname will be set to this. Defaults to 'localpart'. + address (str|None): the IP address used to perform the regitration. Returns: A tuple of (user_id, access_token). Raises: @@ -206,7 +210,7 @@ class RegistrationHandler(BaseHandler): token = None if generate_token: token = self.macaroon_gen.generate_access_token(user_id) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -215,6 +219,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=default_display_name, admin=admin, user_type=user_type, + address=address, ) if self.hs.config.user_directory_search_all_users: @@ -238,12 +243,13 @@ class RegistrationHandler(BaseHandler): if default_display_name is None: default_display_name = localpart try: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, make_guest=make_guest, create_profile_with_displayname=default_display_name, + address=address, ) except SynapseError: # if user id is taken, just generate another @@ -337,7 +343,7 @@ class RegistrationHandler(BaseHandler): user_id, allowed_appservice=service ) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, password_hash="", appservice_id=service_id, @@ -513,7 +519,7 @@ class RegistrationHandler(BaseHandler): token = self.macaroon_gen.generate_access_token(user_id) if need_register: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -590,10 +596,10 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) - def _register_with_store(self, user_id, token=None, password_hash=None, - was_guest=False, make_guest=False, appservice_id=None, - create_profile_with_displayname=None, admin=False, - user_type=None): + def register_with_store(self, user_id, token=None, password_hash=None, + was_guest=False, make_guest=False, appservice_id=None, + create_profile_with_displayname=None, admin=False, + user_type=None, address=None): """Register user in the datastore. Args: @@ -612,10 +618,26 @@ class RegistrationHandler(BaseHandler): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the regitration. Returns: Deferred """ + # Don't rate limit for app services + if appservice_id is None and address is not None: + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + address, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + if self.hs.config.worker_app: return self._register_client( user_id=user_id, @@ -627,6 +649,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=create_profile_with_displayname, admin=admin, user_type=user_type, + address=address, ) else: return self.store.register( diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 190ea2c7b1..aead9e4608 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -232,6 +232,10 @@ class RoomMemberHandler(object): self.copy_room_tags_and_direct_to_room( predecessor["room_id"], room_id, user_id, ) + # Move over old push rules + self.store.move_push_rules_from_room_to_room_for_user( + predecessor["room_id"], room_id, user_id, + ) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index bd97241ab4..57bb996245 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -39,6 +39,9 @@ from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) +# Debug logger for https://github.com/matrix-org/synapse/issues/4422 +issue4422_logger = logging.getLogger("synapse.handler.sync.4422_debug") + # Counts the number of times we returned a non-empty sync. `type` is one of # "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is @@ -962,6 +965,15 @@ class SyncHandler(object): yield self._generate_sync_entry_for_groups(sync_result_builder) + # debug for https://github.com/matrix-org/synapse/issues/4422 + for joined_room in sync_result_builder.joined: + room_id = joined_room.room_id + if room_id in newly_joined_rooms: + issue4422_logger.debug( + "Sync result for newly joined room %s: %r", + room_id, joined_room, + ) + defer.returnValue(SyncResult( presence=sync_result_builder.presence, account_data=sync_result_builder.account_data, @@ -1425,6 +1437,17 @@ class SyncHandler(object): old_mem_ev = yield self.store.get_event( old_mem_ev_id, allow_none=True ) + + # debug for #4422 + if has_join: + prev_membership = None + if old_mem_ev: + prev_membership = old_mem_ev.membership + issue4422_logger.debug( + "Previous membership for room %s with join: %s (event %s)", + room_id, prev_membership, old_mem_ev_id, + ) + if not old_mem_ev or old_mem_ev.membership != Membership.JOIN: newly_joined_rooms.append(room_id) @@ -1519,30 +1542,39 @@ class SyncHandler(object): for room_id in sync_result_builder.joined_room_ids: room_entry = room_to_events.get(room_id, None) + newly_joined = room_id in newly_joined_rooms if room_entry: events, start_key = room_entry prev_batch_token = now_token.copy_and_replace("room_key", start_key) - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=events, - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, - since_token=None if room_id in newly_joined_rooms else since_token, + since_token=None if newly_joined else since_token, upto_token=prev_batch_token, - )) + ) else: - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=[], - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, since_token=since_token, upto_token=since_token, - )) + ) + + if newly_joined: + # debugging for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "RoomSyncResultBuilder events for newly joined room %s: %r", + room_id, entry.events, + ) + room_entries.append(entry) defer.returnValue((room_entries, invited, newly_joined_rooms, newly_left_rooms)) @@ -1663,6 +1695,13 @@ class SyncHandler(object): newly_joined_room=newly_joined, ) + if newly_joined: + # debug for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "Timeline events after filtering in newly-joined room %s: %r", + room_id, batch, + ) + # When we join the room (or the client requests full_state), we should # send down any existing tags. Usually the user won't have tags in a # newly joined room, unless either a) they've joined before or b) the @@ -1894,15 +1933,34 @@ def _calculate_state( class SyncResultBuilder(object): - "Used to help build up a new SyncResult for a user" + """Used to help build up a new SyncResult for a user + + Attributes: + sync_config (SyncConfig) + full_state (bool) + since_token (StreamToken) + now_token (StreamToken) + joined_room_ids (list[str]) + + # The following mirror the fields in a sync response + presence (list) + account_data (list) + joined (list[JoinedSyncResult]) + invited (list[InvitedSyncResult]) + archived (list[ArchivedSyncResult]) + device (list) + groups (GroupsSyncResult|None) + to_device (list) + """ def __init__(self, sync_config, full_state, since_token, now_token, joined_room_ids): """ Args: - sync_config(SyncConfig) - full_state(bool): The full_state flag as specified by user - since_token(StreamToken): The token supplied by user, or None. - now_token(StreamToken): The token to sync up to. + sync_config (SyncConfig) + full_state (bool): The full_state flag as specified by user + since_token (StreamToken): The token supplied by user, or None. + now_token (StreamToken): The token to sync up to. + joined_room_ids (list[str]): List of rooms the user is joined to """ self.sync_config = sync_config self.full_state = full_state @@ -1930,8 +1988,8 @@ class RoomSyncResultBuilder(object): Args: room_id(str) rtype(str): One of `"joined"` or `"archived"` - events(list): List of events to include in the room, (more events - may be added when generating result). + events(list[FrozenEvent]): List of events to include in the room + (more events may be added when generating result). newly_joined(bool): If the user has newly joined the room full_state(bool): Whether the full state should be sent in result since_token(StreamToken): Earliest point to return events from, or None diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index a61bbf9392..39df960c31 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -231,7 +231,7 @@ class TypingHandler(object): for domain in set(get_domain_from_id(u) for u in users): if domain != self.server_name: logger.debug("sending typing update to %s", domain) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=domain, edu_type="m.typing", content={ diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 283c6c1b81..7dc0e236e7 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -15,7 +15,7 @@ import logging -from six import iteritems +from six import iteritems, iterkeys from twisted.internet import defer @@ -38,18 +38,8 @@ class UserDirectoryHandler(object): world_readable or publically joinable room. We keep a database table up to date by streaming changes of the current state and recalculating whether users should be in the directory or not when necessary. - - For each user in the directory we also store a room_id which is public and that the - user is joined to. This allows us to ignore history_visibility and join_rules changes - for that user in all other public rooms, as we know they'll still be in at least - one public room. """ - INITIAL_ROOM_SLEEP_MS = 50 - INITIAL_ROOM_SLEEP_COUNT = 100 - INITIAL_ROOM_BATCH_SIZE = 100 - INITIAL_USER_SLEEP_MS = 10 - def __init__(self, hs): self.store = hs.get_datastore() self.state = hs.get_state_handler() @@ -59,15 +49,6 @@ class UserDirectoryHandler(object): self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users - - # When start up for the first time we need to populate the user_directory. - # This is a set of user_id's we've inserted already - self.initially_handled_users = set() - self.initially_handled_users_in_public = set() - - self.initially_handled_users_share = set() - self.initially_handled_users_share_private_room = set() - # The current position in the current_state_delta stream self.pos = None @@ -130,7 +111,7 @@ class UserDirectoryHandler(object): # Support users are for diagnostics and should not appear in the user directory. if not is_support: yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, None + user_id, profile.display_name, profile.avatar_url ) @defer.inlineCallbacks @@ -140,7 +121,6 @@ class UserDirectoryHandler(object): # FIXME(#3714): We should probably do this in the same worker as all # the other changes. yield self.store.remove_from_user_dir(user_id) - yield self.store.remove_from_user_in_public_room(user_id) @defer.inlineCallbacks def _unsafe_process(self): @@ -148,10 +128,9 @@ class UserDirectoryHandler(object): if self.pos is None: self.pos = yield self.store.get_user_directory_stream_pos() - # If still None then we need to do the initial fill of directory + # If still None then the initial background update hasn't happened yet if self.pos is None: - yield self._do_initial_spam() - self.pos = yield self.store.get_user_directory_stream_pos() + defer.returnValue(None) # Loop round handling deltas until we're up to date while True: @@ -173,149 +152,6 @@ class UserDirectoryHandler(object): yield self.store.update_user_directory_stream_pos(self.pos) @defer.inlineCallbacks - def _do_initial_spam(self): - """Populates the user_directory from the current state of the DB, used - when synapse first starts with user_directory support - """ - new_pos = yield self.store.get_max_stream_id_in_current_state_deltas() - - # Delete any existing entries just in case there are any - yield self.store.delete_all_from_user_dir() - - # We process by going through each existing room at a time. - room_ids = yield self.store.get_all_rooms() - - logger.info("Doing initial update of user directory. %d rooms", len(room_ids)) - num_processed_rooms = 0 - - for room_id in room_ids: - logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) - yield self._handle_initial_room(room_id) - num_processed_rooms += 1 - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - logger.info("Processed all rooms.") - - if self.search_all_users: - num_processed_users = 0 - user_ids = yield self.store.get_all_local_users() - logger.info( - "Doing initial update of user directory. %d users", len(user_ids) - ) - for user_id in user_ids: - # We add profiles for all users even if they don't match the - # include pattern, just in case we want to change it in future - logger.info( - "Handling user %d/%d", num_processed_users + 1, len(user_ids) - ) - yield self._handle_local_user(user_id) - num_processed_users += 1 - yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0) - - logger.info("Processed all users") - - self.initially_handled_users = None - self.initially_handled_users_in_public = None - self.initially_handled_users_share = None - self.initially_handled_users_share_private_room = None - - yield self.store.update_user_directory_stream_pos(new_pos) - - @defer.inlineCallbacks - def _handle_initial_room(self, room_id): - """Called when we initially fill out user_directory one room at a time - """ - is_in_room = yield self.store.is_host_joined(room_id, self.server_name) - if not is_in_room: - return - - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id - ) - - users_with_profile = yield self.state.get_current_user_in_room(room_id) - user_ids = set(users_with_profile) - unhandled_users = user_ids - self.initially_handled_users - - yield self.store.add_profiles_to_user_dir( - room_id, - {user_id: users_with_profile[user_id] for user_id in unhandled_users}, - ) - - self.initially_handled_users |= unhandled_users - - if is_public: - yield self.store.add_users_to_public_room( - room_id, user_ids=user_ids - self.initially_handled_users_in_public - ) - self.initially_handled_users_in_public |= user_ids - - # We now go and figure out the new users who share rooms with user entries - # We sleep aggressively here as otherwise it can starve resources. - # We also batch up inserts/updates, but try to avoid too many at once. - to_insert = set() - to_update = set() - count = 0 - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - if not self.is_mine_id(user_id): - count += 1 - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue - - for other_user_id in user_ids: - if user_id == other_user_id: - continue - - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - count += 1 - - user_set = (user_id, other_user_id) - - if user_set in self.initially_handled_users_share_private_room: - continue - - if user_set in self.initially_handled_users_share: - if is_public: - continue - to_update.add(user_set) - else: - to_insert.add(user_set) - - if is_public: - self.initially_handled_users_share.add(user_set) - else: - self.initially_handled_users_share_private_room.add(user_set) - - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert - ) - to_insert.clear() - - if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - to_insert.clear() - - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - - @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process """ @@ -356,6 +192,7 @@ class UserDirectoryHandler(object): user_ids = yield self.store.get_users_in_dir_due_to_room( room_id ) + for user_id in user_ids: yield self._handle_remove_user(room_id, user_id) return @@ -436,14 +273,20 @@ class UserDirectoryHandler(object): # ignore the change return - if change: - users_with_profile = yield self.state.get_current_user_in_room(room_id) - for user_id, profile in iteritems(users_with_profile): - yield self._handle_new_user(room_id, user_id, profile) - else: - users = yield self.store.get_users_in_public_due_to_room(room_id) - for user_id in users: - yield self._handle_remove_user(room_id, user_id) + users_with_profile = yield self.state.get_current_user_in_room(room_id) + + # Remove every user from the sharing tables for that room. + for user_id in iterkeys(users_with_profile): + yield self.store.remove_user_who_share_room(user_id, room_id) + + # Then, re-add them to the tables. + # NOTE: this is not the most efficient method, as handle_new_user sets + # up local_user -> other_user and other_user_whos_local -> local_user, + # which when ran over an entire room, will result in the same values + # being added multiple times. The batching upserts shouldn't make this + # too bad, though. + for user_id, profile in iteritems(users_with_profile): + yield self._handle_new_user(room_id, user_id, profile) @defer.inlineCallbacks def _handle_local_user(self, user_id): @@ -457,7 +300,9 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir(None, {user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): @@ -469,90 +314,49 @@ class UserDirectoryHandler(object): """ logger.debug("Adding new user to dir, %r", user_id) - row = yield self.store.get_user_in_directory(user_id) - if not row: - yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id ) + # Now we update users who share rooms with users. + users_with_profile = yield self.state.get_current_user_in_room(room_id) if is_public: - row = yield self.store.get_user_in_public_room(user_id) - if not row: - yield self.store.add_users_to_public_room(room_id, [user_id]) + yield self.store.add_users_in_public_rooms(room_id, (user_id,)) else: - logger.debug("Not adding new user to public dir, %r", user_id) - - # Now we update users who share rooms with users. We do this by getting - # all the current users in the room and seeing which aren't already - # marked in the database as sharing with `user_id` + to_insert = set() - users_with_profile = yield self.state.get_current_user_in_room(room_id) + # First, if they're our user then we need to update for every user + if self.is_mine_id(user_id): - to_insert = set() - to_update = set() + is_appservice = self.store.get_if_app_services_interested_in_user( + user_id + ) - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + # We don't care about appservice users. + if not is_appservice: + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id) and not is_appservice: - # Returns a map of other_user_id -> shared_private. We only need - # to update mappings if for users that either don't share a room - # already (aren't in the map) or, if the room is private, those that - # only share a public room. - user_ids_shared = yield self.store.get_users_who_share_room_from_dir( - user_id - ) + to_insert.add((user_id, other_user_id)) + # Next we need to update for every local user in the room for other_user_id in users_with_profile: if user_id == other_user_id: continue - shared_is_private = user_ids_shared.get(other_user_id) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((user_id, other_user_id)) - elif shared_is_private is None: - # This is the first time they both share a room - to_insert.add((user_id, other_user_id)) - - # Next we need to update for every local user in the room - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue - - is_appservice = self.store.get_if_app_services_interested_in_user( - other_user_id - ) - if self.is_mine_id(other_user_id) and not is_appservice: - shared_is_private = yield self.store.get_if_users_share_a_room( - other_user_id, user_id + is_appservice = self.store.get_if_app_services_interested_in_user( + other_user_id ) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((other_user_id, user_id)) - elif shared_is_private is None: - # This is the first time they both share a room + if self.is_mine_id(other_user_id) and not is_appservice: to_insert.add((other_user_id, user_id)) - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): @@ -562,84 +366,16 @@ class UserDirectoryHandler(object): room_id (str): room_id that user left or stopped being public that user_id (str) """ - logger.debug("Maybe removing user %r", user_id) + logger.debug("Removing user %r", user_id) - row = yield self.store.get_user_in_directory(user_id) - update_user_dir = row and row["room_id"] == room_id + # Remove user from sharing tables + yield self.store.remove_user_who_share_room(user_id, room_id) - row = yield self.store.get_user_in_public_room(user_id) - update_user_in_public = row and row["room_id"] == room_id + # Are they still in any rooms? If not, remove them entirely. + rooms_user_is_in = yield self.store.get_user_dir_rooms_user_is_in(user_id) - if update_user_in_public or update_user_dir: - # XXX: Make this faster? - rooms = yield self.store.get_rooms_for_user(user_id) - for j_room_id in rooms: - if not update_user_in_public and not update_user_dir: - break - - is_in_room = yield self.store.is_host_joined( - j_room_id, self.server_name - ) - - if not is_in_room: - continue - - if update_user_dir: - update_user_dir = False - yield self.store.update_user_in_user_dir(user_id, j_room_id) - - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) - - if update_user_in_public and is_public: - yield self.store.update_user_in_public_user_list(user_id, j_room_id) - update_user_in_public = False - - if update_user_dir: + if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) - elif update_user_in_public: - yield self.store.remove_from_user_in_public_room(user_id) - - # Now handle users_who_share_rooms. - - # Get a list of user tuples that were in the DB due to this room and - # users (this includes tuples where the other user matches `user_id`) - user_tuples = yield self.store.get_users_in_share_dir_with_room_id( - user_id, room_id - ) - - for user_id, other_user_id in user_tuples: - # For each user tuple get a list of rooms that they still share, - # trying to find a private room, and update the entry in the DB - rooms = yield self.store.get_rooms_in_common_for_users( - user_id, other_user_id - ) - - # If they dont share a room anymore, remove the mapping - if not rooms: - yield self.store.remove_user_who_share_room(user_id, other_user_id) - continue - - found_public_share = None - for j_room_id in rooms: - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) - - if is_public: - found_public_share = j_room_id - else: - found_public_share = None - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) - break - - if found_public_share: - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) @defer.inlineCallbacks def _handle_profile_change(self, user_id, room_id, prev_event_id, event_id): @@ -665,9 +401,7 @@ class UserDirectoryHandler(object): new_avatar = event.content.get("avatar_url") if prev_name != new_name or prev_avatar != new_avatar: - yield self.store.update_profile_in_user_dir( - user_id, new_name, new_avatar, room_id - ) + yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar) @defer.inlineCallbacks def _get_key_change(self, prev_event_id, event_id, key_name, public_value): diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 384d8a37a2..1334c630cc 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -68,9 +68,13 @@ class MatrixFederationAgent(object): TLS policy to use for fetching .well-known files. None to use a default (browser-like) implementation. - srv_resolver (SrvResolver|None): + _srv_resolver (SrvResolver|None): SRVResolver impl to use for looking up SRV records. None to use a default implementation. + + _well_known_cache (TTLCache|None): + TTLCache impl for storing cached well-known lookups. None to use a default + implementation. """ def __init__( diff --git a/synapse/notifier.py b/synapse/notifier.py index de02b1017e..ff589660da 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -178,8 +178,6 @@ class Notifier(object): self.remove_expired_streams, self.UNUSED_STREAM_EXPIRY_MS ) - self.replication_deferred = ObservableDeferred(defer.Deferred()) - # This is not a very cheap test to perform, but it's only executed # when rendering the metrics page, which is likely once per minute at # most when scraping it. @@ -205,7 +203,9 @@ class Notifier(object): def add_replication_callback(self, cb): """Add a callback that will be called when some new data is available. - Callback is not given any arguments. + Callback is not given any arguments. It should *not* return a Deferred - if + it needs to do any asynchronous work, a background thread should be started and + wrapped with run_as_background_process. """ self.replication_callbacks.append(cb) @@ -517,60 +517,5 @@ class Notifier(object): def notify_replication(self): """Notify the any replication listeners that there's a new event""" - with PreserveLoggingContext(): - deferred = self.replication_deferred - self.replication_deferred = ObservableDeferred(defer.Deferred()) - deferred.callback(None) - - # the callbacks may well outlast the current request, so we run - # them in the sentinel logcontext. - # - # (ideally it would be up to the callbacks to know if they were - # starting off background processes and drop the logcontext - # accordingly, but that requires more changes) - for cb in self.replication_callbacks: - cb() - - @defer.inlineCallbacks - def wait_for_replication(self, callback, timeout): - """Wait for an event to happen. - - Args: - callback: Gets called whenever an event happens. If this returns a - truthy value then ``wait_for_replication`` returns, otherwise - it waits for another event. - timeout: How many milliseconds to wait for callback return a truthy - value. - - Returns: - A deferred that resolves with the value returned by the callback. - """ - listener = _NotificationListener(None) - - end_time = self.clock.time_msec() + timeout - - while True: - listener.deferred = self.replication_deferred.observe() - result = yield callback() - if result: - break - - now = self.clock.time_msec() - if end_time <= now: - break - - listener.deferred = timeout_deferred( - listener.deferred, - timeout=(end_time - now) / 1000., - reactor=self.hs.get_reactor(), - ) - - try: - with PreserveLoggingContext(): - yield listener.deferred - except defer.TimeoutError: - break - except defer.CancelledError: - break - - defer.returnValue(result) + for cb in self.replication_callbacks: + cb() diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 1d27c9221f..912a5ac341 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -33,11 +33,12 @@ class ReplicationRegisterServlet(ReplicationEndpoint): def __init__(self, hs): super(ReplicationRegisterServlet, self).__init__(hs) self.store = hs.get_datastore() + self.registration_handler = hs.get_registration_handler() @staticmethod def _serialize_payload( user_id, token, password_hash, was_guest, make_guest, appservice_id, - create_profile_with_displayname, admin, user_type, + create_profile_with_displayname, admin, user_type, address, ): """ Args: @@ -56,6 +57,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the regitration. """ return { "token": token, @@ -66,13 +68,14 @@ class ReplicationRegisterServlet(ReplicationEndpoint): "create_profile_with_displayname": create_profile_with_displayname, "admin": admin, "user_type": user_type, + "address": address, } @defer.inlineCallbacks def _handle_request(self, request, user_id): content = parse_json_object_from_request(request) - yield self.store.register( + yield self.registration_handler.register_with_store( user_id=user_id, token=content["token"], password_hash=content["password_hash"], @@ -82,6 +85,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): create_profile_with_displayname=content["create_profile_with_displayname"], admin=content["admin"], user_type=content["user_type"], + address=content["address"] ) defer.returnValue((200, {})) diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py index 60641f1a49..5b8521c770 100644 --- a/synapse/replication/slave/storage/client_ips.py +++ b/synapse/replication/slave/storage/client_ips.py @@ -43,6 +43,8 @@ class SlavedClientIpStore(BaseSlavedStore): if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: return + self.client_ip_last_seen.prefill(key, now) + self.hs.get_tcp_replication().send_user_ip( user_id, access_token, ip, user_agent, device_id, now ) diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py index 4f19fd35aa..4d59778863 100644 --- a/synapse/replication/slave/storage/deviceinbox.py +++ b/synapse/replication/slave/storage/deviceinbox.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.storage import DataStore +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker +from synapse.storage.deviceinbox import DeviceInboxWorkerStore from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import BaseSlavedStore, __func__ -from ._slaved_id_tracker import SlavedIdTracker - -class SlavedDeviceInboxStore(BaseSlavedStore): +class SlavedDeviceInboxStore(DeviceInboxWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceInboxStore, self).__init__(db_conn, hs) self._device_inbox_id_gen = SlavedIdTracker( @@ -43,12 +42,6 @@ class SlavedDeviceInboxStore(BaseSlavedStore): expiry_ms=30 * 60 * 1000, ) - get_to_device_stream_token = __func__(DataStore.get_to_device_stream_token) - get_new_messages_for_device = __func__(DataStore.get_new_messages_for_device) - get_new_device_msgs_for_remote = __func__(DataStore.get_new_device_msgs_for_remote) - delete_messages_for_device = __func__(DataStore.delete_messages_for_device) - delete_device_msgs_for_remote = __func__(DataStore.delete_device_msgs_for_remote) - def stream_positions(self): result = super(SlavedDeviceInboxStore, self).stream_positions() result["to_device"] = self._device_inbox_id_gen.get_current_token() diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index ec2fd561cc..16c9a162c5 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.storage import DataStore -from synapse.storage.end_to_end_keys import EndToEndKeyStore +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker +from synapse.storage.devices import DeviceWorkerStore +from synapse.storage.end_to_end_keys import EndToEndKeyWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import BaseSlavedStore, __func__ -from ._slaved_id_tracker import SlavedIdTracker - -class SlavedDeviceStore(BaseSlavedStore): +class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceStore, self).__init__(db_conn, hs) @@ -38,17 +37,6 @@ class SlavedDeviceStore(BaseSlavedStore): "DeviceListFederationStreamChangeCache", device_list_max, ) - get_device_stream_token = __func__(DataStore.get_device_stream_token) - get_user_whose_devices_changed = __func__(DataStore.get_user_whose_devices_changed) - get_devices_by_remote = __func__(DataStore.get_devices_by_remote) - _get_devices_by_remote_txn = __func__(DataStore._get_devices_by_remote_txn) - _get_e2e_device_keys_txn = __func__(DataStore._get_e2e_device_keys_txn) - mark_as_sent_devices_by_remote = __func__(DataStore.mark_as_sent_devices_by_remote) - _mark_as_sent_devices_by_remote_txn = ( - __func__(DataStore._mark_as_sent_devices_by_remote_txn) - ) - count_e2e_one_time_keys = EndToEndKeyStore.__dict__["count_e2e_one_time_keys"] - def stream_positions(self): result = super(SlavedDeviceStore, self).stream_positions() result["device_lists"] = self._device_list_id_gen.get_current_token() @@ -58,14 +46,23 @@ class SlavedDeviceStore(BaseSlavedStore): if stream_name == "device_lists": self._device_list_id_gen.advance(token) for row in rows: - self._device_list_stream_cache.entity_has_changed( - row.user_id, token + self._invalidate_caches_for_devices( + token, row.user_id, row.destination, ) - - if row.destination: - self._device_list_federation_stream_cache.entity_has_changed( - row.destination, token - ) return super(SlavedDeviceStore, self).process_replication_rows( stream_name, token, rows ) + + def _invalidate_caches_for_devices(self, token, user_id, destination): + self._device_list_stream_cache.entity_has_changed( + user_id, token + ) + + if destination: + self._device_list_federation_stream_cache.entity_has_changed( + destination, token + ) + + self._get_cached_devices_for_user.invalidate((user_id,)) + self._get_cached_user_device.invalidate_many((user_id,)) + self.get_device_list_last_stream_id_for_remote.invalidate((user_id,)) diff --git a/synapse/replication/slave/storage/presence.py b/synapse/replication/slave/storage/presence.py index 92447b00d4..9e530defe0 100644 --- a/synapse/replication/slave/storage/presence.py +++ b/synapse/replication/slave/storage/presence.py @@ -54,8 +54,11 @@ class SlavedPresenceStore(BaseSlavedStore): def stream_positions(self): result = super(SlavedPresenceStore, self).stream_positions() - position = self._presence_id_gen.get_current_token() - result["presence"] = position + + if self.hs.config.use_presence: + position = self._presence_id_gen.get_current_token() + result["presence"] = position + return result def process_replication_rows(self, stream_name, token, rows): diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index f0200c1e98..45fc913c52 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -20,7 +20,7 @@ from ._slaved_id_tracker import SlavedIdTracker from .events import SlavedEventStore -class SlavedPushRuleStore(PushRulesWorkerStore, SlavedEventStore): +class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore): def __init__(self, db_conn, hs): self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id", diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 586dddb40b..e558f90e1a 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -39,7 +39,7 @@ class ReplicationClientFactory(ReconnectingClientFactory): Accepts a handler that will be called when new data is available or data is required. """ - maxDelay = 5 # Try at least once every N seconds + maxDelay = 30 # Try at least once every N seconds def __init__(self, hs, client_name, handler): self.client_name = client_name @@ -54,7 +54,6 @@ class ReplicationClientFactory(ReconnectingClientFactory): def buildProtocol(self, addr): logger.info("Connected to replication: %r", addr) - self.resetDelay() return ClientReplicationStreamProtocol( self.client_name, self.server_name, self._clock, self.handler ) @@ -90,15 +89,18 @@ class ReplicationClientHandler(object): # Used for tests. self.awaiting_syncs = {} + # The factory used to create connections. + self.factory = None + def start_replication(self, hs): """Helper method to start a replication connection to the remote server using TCP. """ client_name = hs.config.worker_name - factory = ReplicationClientFactory(hs, client_name, self) + self.factory = ReplicationClientFactory(hs, client_name, self) host = hs.config.worker_replication_host port = hs.config.worker_replication_port - hs.get_reactor().connectTCP(host, port, factory) + hs.get_reactor().connectTCP(host, port, self.factory) def on_rdata(self, stream_name, token, rows): """Called when we get new replication data. By default this just pokes @@ -140,6 +142,7 @@ class ReplicationClientHandler(object): args["account_data"] = user_account_data elif room_account_data: args["account_data"] = room_account_data + return args def get_currently_syncing_users(self): @@ -204,3 +207,14 @@ class ReplicationClientHandler(object): for cmd in self.pending_commands: connection.send_command(cmd) self.pending_commands = [] + + def finished_connecting(self): + """Called when we have successfully subscribed and caught up to all + streams we're interested in. + """ + logger.info("Finished connecting to server") + + # We don't reset the delay any earlier as otherwise if there is a + # problem during start up we'll end up tight looping connecting to the + # server. + self.factory.resetDelay() diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 327556f6a1..2098c32a77 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -127,8 +127,11 @@ class RdataCommand(Command): class PositionCommand(Command): - """Sent by the client to tell the client the stream postition without + """Sent by the server to tell the client the stream postition without needing to send an RDATA. + + Sent to the client after all missing updates for a stream have been sent + to the client and they're now up to date. """ NAME = "POSITION" diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 429471c345..55630ba9a7 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -451,7 +451,7 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): @defer.inlineCallbacks def subscribe_to_stream(self, stream_name, token): - """Subscribe the remote to a streams. + """Subscribe the remote to a stream. This invloves checking if they've missed anything and sending those updates down if they have. During that time new updates for the stream @@ -478,11 +478,36 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # Now we can send any updates that came in while we were subscribing pending_rdata = self.pending_rdata.pop(stream_name, []) + updates = [] for token, update in pending_rdata: - # Only send updates newer than the current token - if token > current_token: + # If the token is null, it is part of a batch update. Batches + # are multiple updates that share a single token. To denote + # this, the token is set to None for all tokens in the batch + # except for the last. If we find a None token, we keep looking + # through tokens until we find one that is not None and then + # process all previous updates in the batch as if they had the + # final token. + if token is None: + # Store this update as part of a batch + updates.append(update) + continue + + if token <= current_token: + # This update or batch of updates is older than + # current_token, dismiss it + updates = [] + continue + + updates.append(update) + + # Send all updates that are part of this batch with the + # found token + for update in updates: self.send_command(RdataCommand(stream_name, token, update)) + # Clear stored updates + updates = [] + # They're now fully subscribed self.replication_streams.add(stream_name) except Exception as e: @@ -526,6 +551,11 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.server_name = server_name self.handler = handler + # Set of stream names that have been subscribe to, but haven't yet + # caught up with. This is used to track when the client has been fully + # connected to the remote. + self.streams_connecting = set() + # Map of stream to batched updates. See RdataCommand for info on how # batching works. self.pending_batches = {} @@ -548,6 +578,10 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): # We've now finished connecting to so inform the client handler self.handler.update_connection(self) + # This will happen if we don't actually subscribe to any streams + if not self.streams_connecting: + self.handler.finished_connecting() + def on_SERVER(self, cmd): if cmd.data != self.server_name: logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data) @@ -577,6 +611,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): return self.handler.on_rdata(stream_name, cmd.token, rows) def on_POSITION(self, cmd): + # When we get a `POSITION` command it means we've finished getting + # missing updates for the given stream, and are now up to date. + self.streams_connecting.discard(cmd.stream_name) + if not self.streams_connecting: + self.handler.finished_connecting() + return self.handler.on_position(cmd.stream_name, cmd.token) def on_SYNC(self, cmd): @@ -593,6 +633,8 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.id(), stream_name, token ) + self.streams_connecting.add(stream_name) + self.send_command(ReplicateCommand(stream_name, token)) def on_connection_closed(self): diff --git a/synapse/res/templates/notif.html b/synapse/res/templates/notif.html index 88b921ca9c..1a6c70b562 100644 --- a/synapse/res/templates/notif.html +++ b/synapse/res/templates/notif.html @@ -6,11 +6,11 @@ <img alt="" class="sender_avatar" src="{{ message.sender_avatar_url|mxc_to_http(32,32) }}" /> {% else %} {% if message.sender_hash % 3 == 0 %} - <img class="sender_avatar" src="https://vector.im/beta/img/76cfa6.png" /> + <img class="sender_avatar" src="https://riot.im/img/external/avatar-1.png" /> {% elif message.sender_hash % 3 == 1 %} - <img class="sender_avatar" src="https://vector.im/beta/img/50e2c2.png" /> + <img class="sender_avatar" src="https://riot.im/img/external/avatar-2.png" /> {% else %} - <img class="sender_avatar" src="https://vector.im/beta/img/f4c371.png" /> + <img class="sender_avatar" src="https://riot.im/img/external/avatar-3.png" /> {% endif %} {% endif %} {% endif %} diff --git a/synapse/res/templates/notif_mail.html b/synapse/res/templates/notif_mail.html index fcdb3109fe..019506e5fb 100644 --- a/synapse/res/templates/notif_mail.html +++ b/synapse/res/templates/notif_mail.html @@ -19,7 +19,7 @@ </td> <td class="logo"> {% if app_name == "Riot" %} - <img src="http://matrix.org/img/riot-logo-email.png" width="83" height="83" alt="[Riot]"/> + <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/> {% elif app_name == "Vector" %} <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/> {% else %} diff --git a/synapse/res/templates/room.html b/synapse/res/templates/room.html index 723c222d25..b8525fef88 100644 --- a/synapse/res/templates/room.html +++ b/synapse/res/templates/room.html @@ -5,11 +5,11 @@ <img alt="" src="{{ room.avatar_url|mxc_to_http(48,48) }}" /> {% else %} {% if room.hash % 3 == 0 %} - <img alt="" src="https://vector.im/beta/img/76cfa6.png" /> + <img alt="" src="https://riot.im/img/external/avatar-1.png" /> {% elif room.hash % 3 == 1 %} - <img alt="" src="https://vector.im/beta/img/50e2c2.png" /> + <img alt="" src="https://riot.im/img/external/avatar-2.png" /> {% else %} - <img alt="" src="https://vector.im/beta/img/f4c371.png" /> + <img alt="" src="https://riot.im/img/external/avatar-3.png" /> {% endif %} {% endif %} </td> diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 82433a2aa9..2a29f0c2af 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,12 +17,14 @@ import hashlib import hmac import logging +import platform from six import text_type from six.moves import http_client from twisted.internet import defer +import synapse from synapse.api.constants import Membership, UserTypes from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.servlet import ( @@ -32,6 +34,7 @@ from synapse.http.servlet import ( parse_string, ) from synapse.types import UserID, create_requester +from synapse.util.versionstring import get_version_string from .base import ClientV1RestServlet, client_path_patterns @@ -66,6 +69,25 @@ class UsersRestServlet(ClientV1RestServlet): defer.returnValue((200, ret)) +class VersionServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns("/admin/server_version") + + @defer.inlineCallbacks + def on_GET(self, request): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + ret = { + 'server_version': get_version_string(synapse), + 'python_version': platform.python_version(), + } + + defer.returnValue((200, ret)) + + class UserRegisterServlet(ClientV1RestServlet): """ Attributes: @@ -466,17 +488,6 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ) new_room_id = info["room_id"] - yield self.event_creation_handler.create_and_send_nonmember_event( - room_creator_requester, - { - "type": "m.room.message", - "content": {"body": message, "msgtype": "m.text"}, - "room_id": new_room_id, - "sender": new_room_user_id, - }, - ratelimit=False, - ) - requester_user_id = requester.user.to_string() logger.info("Shutting down room %r", room_id) @@ -514,6 +525,17 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): kicked_users.append(user_id) + yield self.event_creation_handler.create_and_send_nonmember_event( + room_creator_requester, + { + "type": "m.room.message", + "content": {"body": message, "msgtype": "m.text"}, + "room_id": new_room_id, + "sender": new_room_user_id, + }, + ratelimit=False, + ) + aliases_for_room = yield self.store.get_aliases_for_room(room_id) yield self.store.update_aliases_for_room( @@ -763,3 +785,4 @@ def register_servlets(hs, http_server): QuarantineMediaInRoom(hs).register(http_server) ListMediaInRoom(hs).register(http_server) UserRegisterServlet(hs).register(http_server) + VersionServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 6121c5b6df..8d56effbb8 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -22,6 +22,7 @@ from twisted.internet import defer from twisted.web.client import PartialDownloadError from synapse.api.errors import Codes, LoginError, SynapseError +from synapse.api.ratelimiting import Ratelimiter from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, @@ -97,6 +98,7 @@ class LoginRestServlet(ClientV1RestServlet): self.registration_handler = hs.get_registration_handler() self.handlers = hs.get_handlers() self._well_known_builder = WellKnownBuilder(hs) + self._address_ratelimiter = Ratelimiter() def on_GET(self, request): flows = [] @@ -129,6 +131,13 @@ class LoginRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request): + self._address_ratelimiter.ratelimit( + request.getClientIP(), time_now_s=self.hs.clock.time(), + rate_hz=self.hs.config.rc_login_address.per_second, + burst_count=self.hs.config.rc_login_address.burst_count, + update=True, + ) + login_submission = parse_json_object_from_request(request) try: if self.jwt_enabled and (login_submission["type"] == @@ -285,6 +294,7 @@ class LoginRestServlet(ClientV1RestServlet): raise LoginError(401, "Invalid JWT", errcode=Codes.UNAUTHORIZED) user_id = UserID(user, self.hs.hostname).to_string() + auth_handler = self.auth_handler registered_user_id = yield auth_handler.check_user_exists(user_id) if registered_user_id: diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 94cbba4303..6d235262c8 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -25,7 +25,12 @@ from twisted.internet import defer import synapse import synapse.types from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError, UnrecognizedRequestError +from synapse.api.errors import ( + Codes, + LimitExceededError, + SynapseError, + UnrecognizedRequestError, +) from synapse.config.server import is_threepid_reserved from synapse.http.servlet import ( RestServlet, @@ -191,18 +196,36 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() + self.ratelimiter = hs.get_registration_ratelimiter() + self.clock = hs.get_clock() @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): body = parse_json_object_from_request(request) + client_addr = request.getClientIP() + + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + client_addr, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, + update=False, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + kind = b"user" if b"kind" in request.args: kind = request.args[b"kind"][0] if kind == b"guest": - ret = yield self._do_guest_registration(body) + ret = yield self._do_guest_registration(body, address=client_addr) defer.returnValue(ret) return elif kind != b"user": @@ -411,6 +434,7 @@ class RegisterRestServlet(RestServlet): guest_access_token=guest_access_token, generate_token=False, threepid=threepid, + address=client_addr, ) # Necessary due to auth checks prior to the threepid being # written to the db @@ -522,12 +546,13 @@ class RegisterRestServlet(RestServlet): defer.returnValue(result) @defer.inlineCallbacks - def _do_guest_registration(self, params): + def _do_guest_registration(self, params, address=None): if not self.hs.config.allow_guest_access: raise SynapseError(403, "Guest access is disabled") user_id, _ = yield self.registration_handler.register( generate_token=False, - make_guest=True + make_guest=True, + address=address, ) # we don't allow guests to specify their own device_id, because diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index d16a30acd8..953d89bd82 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -99,10 +100,29 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: - if is_ascii(upload_name): - disposition = "inline; filename=%s" % (_quote(upload_name),) + # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. + # + # `filename` is defined to be a `value`, which is defined by RFC2616 + # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token` + # is (essentially) a single US-ASCII word, and a `quoted-string` is a + # US-ASCII string surrounded by double-quotes, using backslash as an + # escape charater. Note that %-encoding is *not* permitted. + # + # `filename*` is defined to be an `ext-value`, which is defined in + # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`, + # where `value-chars` is essentially a %-encoded string in the given charset. + # + # [1]: https://tools.ietf.org/html/rfc6266#section-4.1 + # [2]: https://tools.ietf.org/html/rfc2616#section-3.6 + # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1 + + # We avoid the quoted-string version of `filename`, because (a) synapse didn't + # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we + # may as well just do the filename* version. + if _can_encode_filename_as_token(upload_name): + disposition = 'inline; filename=%s' % (upload_name, ) else: - disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name), ) request.setHeader(b"Content-Disposition", disposition.encode('ascii')) @@ -115,6 +135,35 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Length", b"%d" % (file_size,)) +# separators as defined in RFC2616. SP and HT are handled separately. +# see _can_encode_filename_as_token. +_FILENAME_SEPARATOR_CHARS = set(( + "(", ")", "<", ">", "@", ",", ";", ":", "\\", '"', + "/", "[", "]", "?", "=", "{", "}", +)) + + +def _can_encode_filename_as_token(x): + for c in x: + # from RFC2616: + # + # token = 1*<any CHAR except CTLs or separators> + # + # separators = "(" | ")" | "<" | ">" | "@" + # | "," | ";" | ":" | "\" | <"> + # | "/" | "[" | "]" | "?" | "=" + # | "{" | "}" | SP | HT + # + # CHAR = <any US-ASCII character (octets 0 - 127)> + # + # CTL = <any US-ASCII control character + # (octets 0 - 31) and DEL (127)> + # + if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS: + return False + return True + + @defer.inlineCallbacks def respond_with_responder(request, responder, media_type, file_size, upload_name=None): """Responds to the request with given responder. If responder is None then @@ -213,8 +262,7 @@ def get_filename_from_headers(headers): Content-Disposition HTTP header. Args: - headers (twisted.web.http_headers.Headers): The HTTP - request headers. + headers (dict[bytes, list[bytes]]): The HTTP request headers. Returns: A Unicode string of the filename, or None. @@ -225,23 +273,12 @@ def get_filename_from_headers(headers): if not content_disposition[0]: return - # dict of unicode: bytes, corresponding to the key value sections of the - # Content-Disposition header. - params = {} - parts = content_disposition[0].split(b";") - for i in parts: - # Split into key-value pairs, if able - # We don't care about things like `inline`, so throw it out - if b"=" not in i: - continue - - key, value = i.strip().split(b"=") - params[key.decode('ascii')] = value + _, params = _parse_header(content_disposition[0]) upload_name = None # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) + upload_name_utf8 = params.get(b"filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] @@ -267,12 +304,68 @@ def get_filename_from_headers(headers): # If there isn't check for an ascii name. if not upload_name: - upload_name_ascii = params.get("filename", None) + upload_name_ascii = params.get(b"filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): - # Make sure there's no %-quoted bytes. If there is, reject it as - # non-valid ASCII. - if b"%" not in upload_name_ascii: - upload_name = upload_name_ascii.decode('ascii') + upload_name = upload_name_ascii.decode('ascii') # This may be None here, indicating we did not find a matching name. return upload_name + + +def _parse_header(line): + """Parse a Content-type like header. + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + line (bytes): header to be parsed + + Returns: + Tuple[bytes, dict[bytes, bytes]]: + the main content-type, followed by the parameter dictionary + """ + parts = _parseparam(b';' + line) + key = next(parts) + pdict = {} + for p in parts: + i = p.find(b'=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1:].strip() + + # strip double-quotes + if len(value) >= 2 and value[0:1] == value[-1:] == b'"': + value = value[1:-1] + value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') + pdict[name] = value + + return key, pdict + + +def _parseparam(s): + """Generator which splits the input on ;, respecting double-quoted sequences + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + s (bytes): header to be parsed + + Returns: + Iterable[bytes]: the split input + """ + while s[:1] == b';': + s = s[1:] + + # look for the next ; + end = s.find(b';') + + # if there is an odd number of " marks between here and the next ;, skip to the + # next ; instead + while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: + end = s.find(b';', end + 1) + + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] diff --git a/synapse/server.py b/synapse/server.py index 4d364fccce..dc8f1ccb8c 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -42,7 +42,7 @@ from synapse.federation.federation_server import ( ReplicationFederationHandlerRegistry, ) from synapse.federation.send_queue import FederationRemoteSendQueue -from synapse.federation.transaction_queue import TransactionQueue +from synapse.federation.sender import FederationSender from synapse.federation.transport.client import TransportLayerClient from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer from synapse.groups.groups_server import GroupsServerHandler @@ -51,7 +51,7 @@ from synapse.handlers.acme import AcmeHandler from synapse.handlers.appservice import ApplicationServicesHandler from synapse.handlers.auth import AuthHandler, MacaroonGenerator from synapse.handlers.deactivate_account import DeactivateAccountHandler -from synapse.handlers.device import DeviceHandler +from synapse.handlers.device import DeviceHandler, DeviceWorkerHandler from synapse.handlers.devicemessage import DeviceMessageHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.e2e_room_keys import E2eRoomKeysHandler @@ -185,6 +185,10 @@ class HomeServer(object): 'registration_handler', ] + REQUIRED_ON_MASTER_STARTUP = [ + "user_directory_handler", + ] + # This is overridden in derived application classes # (such as synapse.app.homeserver.SynapseHomeServer) and gives the class to be # instantiated during setup() for future return by get_datastore() @@ -206,6 +210,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() + self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -220,6 +225,15 @@ class HomeServer(object): conn.commit() logger.info("Finished setting up.") + def setup_master(self): + """ + Some handlers have side effects on instantiation (like registering + background updates). This function causes them to be fetched, and + therefore instantiated, to run those side effects. + """ + for i in self.REQUIRED_ON_MASTER_STARTUP: + getattr(self, "get_" + i)() + def get_reactor(self): """ Fetch the Twisted reactor in use by this HomeServer. @@ -251,6 +265,9 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter + def get_registration_ratelimiter(self): + return self.registration_ratelimiter + def build_federation_client(self): return FederationClient(self) @@ -307,7 +324,10 @@ class HomeServer(object): return MacaroonGenerator(self) def build_device_handler(self): - return DeviceHandler(self) + if self.config.worker_app: + return DeviceWorkerHandler(self) + else: + return DeviceHandler(self) def build_device_message_handler(self): return DeviceMessageHandler(self) @@ -414,7 +434,7 @@ class HomeServer(object): def build_federation_sender(self): if self.should_send_federation(): - return TransactionQueue(self) + return FederationSender(self) elif not self.config.worker_app: return FederationRemoteSendQueue(self) else: diff --git a/synapse/server.pyi b/synapse/server.pyi index 06cd083a74..3ba3a967c2 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -1,5 +1,6 @@ import synapse.api.auth import synapse.config.homeserver +import synapse.federation.sender import synapse.federation.transaction_queue import synapse.federation.transport.client import synapse.handlers @@ -7,9 +8,9 @@ import synapse.handlers.auth import synapse.handlers.deactivate_account import synapse.handlers.device import synapse.handlers.e2e_keys +import synapse.handlers.message import synapse.handlers.room import synapse.handlers.room_member -import synapse.handlers.message import synapse.handlers.set_password import synapse.rest.media.v1.media_repository import synapse.server_notices.server_notices_manager @@ -62,7 +63,7 @@ class HomeServer(object): def get_set_password_handler(self) -> synapse.handlers.set_password.SetPasswordHandler: pass - def get_federation_sender(self) -> synapse.federation.transaction_queue.TransactionQueue: + def get_federation_sender(self) -> synapse.federation.sender.FederationSender: pass def get_federation_transport_client(self) -> synapse.federation.transport.client.TransportLayerClient: diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a0333d5309..7e3903859b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -767,18 +767,25 @@ class SQLBaseStore(object): """ allvalues = {} allvalues.update(keyvalues) - allvalues.update(values) allvalues.update(insertion_values) + if not values: + latter = "NOTHING" + else: + allvalues.update(values) + latter = ( + "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) + ) + sql = ( "INSERT INTO %s (%s) VALUES (%s) " - "ON CONFLICT (%s) DO UPDATE SET %s" + "ON CONFLICT (%s) DO %s" ) % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), ", ".join(k for k in keyvalues), - ", ".join(k + "=EXCLUDED." + k for k in values), + latter ) txn.execute(sql, list(allvalues.values())) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 60cdc884e6..a2f8c23a65 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -52,7 +52,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.avg_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: # Use the exponential moving average so that we can adapt to @@ -64,7 +66,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.total_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: return float(self.total_item_count) / float(self.total_duration_ms) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index e06b0bc56d..e6a42a53bb 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -19,14 +19,174 @@ from canonicaljson import json from twisted.internet import defer +from synapse.storage._base import SQLBaseStore +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.expiringcache import ExpiringCache -from .background_updates import BackgroundUpdateStore - logger = logging.getLogger(__name__) -class DeviceInboxStore(BackgroundUpdateStore): +class DeviceInboxWorkerStore(SQLBaseStore): + def get_to_device_stream_token(self): + return self._device_inbox_id_gen.get_current_token() + + def get_new_messages_for_device( + self, user_id, device_id, last_stream_id, current_stream_id, limit=100 + ): + """ + Args: + user_id(str): The recipient user_id. + device_id(str): The recipient device_id. + current_stream_id(int): The current position of the to device + message stream. + Returns: + Deferred ([dict], int): List of messages for the device and where + in the stream the messages got to. + """ + has_changed = self._device_inbox_stream_cache.has_entity_changed( + user_id, last_stream_id + ) + if not has_changed: + return defer.succeed(([], current_stream_id)) + + def get_new_messages_for_device_txn(txn): + sql = ( + "SELECT stream_id, message_json FROM device_inbox" + " WHERE user_id = ? AND device_id = ?" + " AND ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + " LIMIT ?" + ) + txn.execute(sql, ( + user_id, device_id, last_stream_id, current_stream_id, limit + )) + messages = [] + for row in txn: + stream_pos = row[0] + messages.append(json.loads(row[1])) + if len(messages) < limit: + stream_pos = current_stream_id + return (messages, stream_pos) + + return self.runInteraction( + "get_new_messages_for_device", get_new_messages_for_device_txn, + ) + + @defer.inlineCallbacks + def delete_messages_for_device(self, user_id, device_id, up_to_stream_id): + """ + Args: + user_id(str): The recipient user_id. + device_id(str): The recipient device_id. + up_to_stream_id(int): Where to delete messages up to. + Returns: + A deferred that resolves to the number of messages deleted. + """ + # If we have cached the last stream id we've deleted up to, we can + # check if there is likely to be anything that needs deleting + last_deleted_stream_id = self._last_device_delete_cache.get( + (user_id, device_id), None + ) + if last_deleted_stream_id: + has_changed = self._device_inbox_stream_cache.has_entity_changed( + user_id, last_deleted_stream_id + ) + if not has_changed: + defer.returnValue(0) + + def delete_messages_for_device_txn(txn): + sql = ( + "DELETE FROM device_inbox" + " WHERE user_id = ? AND device_id = ?" + " AND stream_id <= ?" + ) + txn.execute(sql, (user_id, device_id, up_to_stream_id)) + return txn.rowcount + + count = yield self.runInteraction( + "delete_messages_for_device", delete_messages_for_device_txn + ) + + # Update the cache, ensuring that we only ever increase the value + last_deleted_stream_id = self._last_device_delete_cache.get( + (user_id, device_id), 0 + ) + self._last_device_delete_cache[(user_id, device_id)] = max( + last_deleted_stream_id, up_to_stream_id + ) + + defer.returnValue(count) + + def get_new_device_msgs_for_remote( + self, destination, last_stream_id, current_stream_id, limit=100 + ): + """ + Args: + destination(str): The name of the remote server. + last_stream_id(int|long): The last position of the device message stream + that the server sent up to. + current_stream_id(int|long): The current position of the device + message stream. + Returns: + Deferred ([dict], int|long): List of messages for the device and where + in the stream the messages got to. + """ + + has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( + destination, last_stream_id + ) + if not has_changed or last_stream_id == current_stream_id: + return defer.succeed(([], current_stream_id)) + + def get_new_messages_for_remote_destination_txn(txn): + sql = ( + "SELECT stream_id, messages_json FROM device_federation_outbox" + " WHERE destination = ?" + " AND ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + " LIMIT ?" + ) + txn.execute(sql, ( + destination, last_stream_id, current_stream_id, limit + )) + messages = [] + for row in txn: + stream_pos = row[0] + messages.append(json.loads(row[1])) + if len(messages) < limit: + stream_pos = current_stream_id + return (messages, stream_pos) + + return self.runInteraction( + "get_new_device_msgs_for_remote", + get_new_messages_for_remote_destination_txn, + ) + + def delete_device_msgs_for_remote(self, destination, up_to_stream_id): + """Used to delete messages when the remote destination acknowledges + their receipt. + + Args: + destination(str): The destination server_name + up_to_stream_id(int): Where to delete messages up to. + Returns: + A deferred that resolves when the messages have been deleted. + """ + def delete_messages_for_remote_destination_txn(txn): + sql = ( + "DELETE FROM device_federation_outbox" + " WHERE destination = ?" + " AND stream_id <= ?" + ) + txn.execute(sql, (destination, up_to_stream_id)) + + return self.runInteraction( + "delete_device_msgs_for_remote", + delete_messages_for_remote_destination_txn + ) + + +class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" def __init__(self, db_conn, hs): @@ -220,93 +380,6 @@ class DeviceInboxStore(BackgroundUpdateStore): txn.executemany(sql, rows) - def get_new_messages_for_device( - self, user_id, device_id, last_stream_id, current_stream_id, limit=100 - ): - """ - Args: - user_id(str): The recipient user_id. - device_id(str): The recipient device_id. - current_stream_id(int): The current position of the to device - message stream. - Returns: - Deferred ([dict], int): List of messages for the device and where - in the stream the messages got to. - """ - has_changed = self._device_inbox_stream_cache.has_entity_changed( - user_id, last_stream_id - ) - if not has_changed: - return defer.succeed(([], current_stream_id)) - - def get_new_messages_for_device_txn(txn): - sql = ( - "SELECT stream_id, message_json FROM device_inbox" - " WHERE user_id = ? AND device_id = ?" - " AND ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - " LIMIT ?" - ) - txn.execute(sql, ( - user_id, device_id, last_stream_id, current_stream_id, limit - )) - messages = [] - for row in txn: - stream_pos = row[0] - messages.append(json.loads(row[1])) - if len(messages) < limit: - stream_pos = current_stream_id - return (messages, stream_pos) - - return self.runInteraction( - "get_new_messages_for_device", get_new_messages_for_device_txn, - ) - - @defer.inlineCallbacks - def delete_messages_for_device(self, user_id, device_id, up_to_stream_id): - """ - Args: - user_id(str): The recipient user_id. - device_id(str): The recipient device_id. - up_to_stream_id(int): Where to delete messages up to. - Returns: - A deferred that resolves to the number of messages deleted. - """ - # If we have cached the last stream id we've deleted up to, we can - # check if there is likely to be anything that needs deleting - last_deleted_stream_id = self._last_device_delete_cache.get( - (user_id, device_id), None - ) - if last_deleted_stream_id: - has_changed = self._device_inbox_stream_cache.has_entity_changed( - user_id, last_deleted_stream_id - ) - if not has_changed: - defer.returnValue(0) - - def delete_messages_for_device_txn(txn): - sql = ( - "DELETE FROM device_inbox" - " WHERE user_id = ? AND device_id = ?" - " AND stream_id <= ?" - ) - txn.execute(sql, (user_id, device_id, up_to_stream_id)) - return txn.rowcount - - count = yield self.runInteraction( - "delete_messages_for_device", delete_messages_for_device_txn - ) - - # Update the cache, ensuring that we only ever increase the value - last_deleted_stream_id = self._last_device_delete_cache.get( - (user_id, device_id), 0 - ) - self._last_device_delete_cache[(user_id, device_id)] = max( - last_deleted_stream_id, up_to_stream_id - ) - - defer.returnValue(count) - def get_all_new_device_messages(self, last_pos, current_pos, limit): """ Args: @@ -351,77 +424,6 @@ class DeviceInboxStore(BackgroundUpdateStore): "get_all_new_device_messages", get_all_new_device_messages_txn ) - def get_to_device_stream_token(self): - return self._device_inbox_id_gen.get_current_token() - - def get_new_device_msgs_for_remote( - self, destination, last_stream_id, current_stream_id, limit=100 - ): - """ - Args: - destination(str): The name of the remote server. - last_stream_id(int|long): The last position of the device message stream - that the server sent up to. - current_stream_id(int|long): The current position of the device - message stream. - Returns: - Deferred ([dict], int|long): List of messages for the device and where - in the stream the messages got to. - """ - - has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( - destination, last_stream_id - ) - if not has_changed or last_stream_id == current_stream_id: - return defer.succeed(([], current_stream_id)) - - def get_new_messages_for_remote_destination_txn(txn): - sql = ( - "SELECT stream_id, messages_json FROM device_federation_outbox" - " WHERE destination = ?" - " AND ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - " LIMIT ?" - ) - txn.execute(sql, ( - destination, last_stream_id, current_stream_id, limit - )) - messages = [] - for row in txn: - stream_pos = row[0] - messages.append(json.loads(row[1])) - if len(messages) < limit: - stream_pos = current_stream_id - return (messages, stream_pos) - - return self.runInteraction( - "get_new_device_msgs_for_remote", - get_new_messages_for_remote_destination_txn, - ) - - def delete_device_msgs_for_remote(self, destination, up_to_stream_id): - """Used to delete messages when the remote destination acknowledges - their receipt. - - Args: - destination(str): The destination server_name - up_to_stream_id(int): Where to delete messages up to. - Returns: - A deferred that resolves when the messages have been deleted. - """ - def delete_messages_for_remote_destination_txn(txn): - sql = ( - "DELETE FROM device_federation_outbox" - " WHERE destination = ?" - " AND stream_id <= ?" - ) - txn.execute(sql, (destination, up_to_stream_id)) - - return self.runInteraction( - "delete_device_msgs_for_remote", - delete_messages_for_remote_destination_txn - ) - @defer.inlineCallbacks def _background_drop_index_device_inbox(self, progress, batch_size): def reindex_txn(conn): diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index ecdab34e7d..e716dc1437 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -22,11 +22,10 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList -from ._base import Cache, db_to_json - logger = logging.getLogger(__name__) DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( @@ -34,7 +33,343 @@ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( ) -class DeviceStore(BackgroundUpdateStore): +class DeviceWorkerStore(SQLBaseStore): + def get_device(self, user_id, device_id): + """Retrieve a device. + + Args: + user_id (str): The ID of the user which owns the device + device_id (str): The ID of the device to retrieve + Returns: + defer.Deferred for a dict containing the device information + Raises: + StoreError: if the device is not found + """ + return self._simple_select_one( + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + retcols=("user_id", "device_id", "display_name"), + desc="get_device", + ) + + @defer.inlineCallbacks + def get_devices_by_user(self, user_id): + """Retrieve all of a user's registered devices. + + Args: + user_id (str): + Returns: + defer.Deferred: resolves to a dict from device_id to a dict + containing "device_id", "user_id" and "display_name" for each + device. + """ + devices = yield self._simple_select_list( + table="devices", + keyvalues={"user_id": user_id}, + retcols=("user_id", "device_id", "display_name"), + desc="get_devices_by_user" + ) + + defer.returnValue({d["device_id"]: d for d in devices}) + + def get_devices_by_remote(self, destination, from_stream_id): + """Get stream of updates to send to remote servers + + Returns: + (int, list[dict]): current stream id and list of updates + """ + now_stream_id = self._device_list_id_gen.get_current_token() + + has_changed = self._device_list_federation_stream_cache.has_entity_changed( + destination, int(from_stream_id) + ) + if not has_changed: + return (now_stream_id, []) + + return self.runInteraction( + "get_devices_by_remote", self._get_devices_by_remote_txn, + destination, from_stream_id, now_stream_id, + ) + + def _get_devices_by_remote_txn(self, txn, destination, from_stream_id, + now_stream_id): + sql = """ + SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes + WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? + GROUP BY user_id, device_id + LIMIT 20 + """ + txn.execute( + sql, (destination, from_stream_id, now_stream_id, False) + ) + + # maps (user_id, device_id) -> stream_id + query_map = {(r[0], r[1]): r[2] for r in txn} + if not query_map: + return (now_stream_id, []) + + if len(query_map) >= 20: + now_stream_id = max(stream_id for stream_id in itervalues(query_map)) + + devices = self._get_e2e_device_keys_txn( + txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True + ) + + prev_sent_id_sql = """ + SELECT coalesce(max(stream_id), 0) as stream_id + FROM device_lists_outbound_last_success + WHERE destination = ? AND user_id = ? AND stream_id <= ? + """ + + results = [] + for user_id, user_devices in iteritems(devices): + # The prev_id for the first row is always the last row before + # `from_stream_id` + txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) + rows = txn.fetchall() + prev_id = rows[0][0] + for device_id, device in iteritems(user_devices): + stream_id = query_map[(user_id, device_id)] + result = { + "user_id": user_id, + "device_id": device_id, + "prev_id": [prev_id] if prev_id else [], + "stream_id": stream_id, + } + + prev_id = stream_id + + if device is not None: + key_json = device.get("key_json", None) + if key_json: + result["keys"] = db_to_json(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + else: + result["deleted"] = True + + results.append(result) + + return (now_stream_id, results) + + def mark_as_sent_devices_by_remote(self, destination, stream_id): + """Mark that updates have successfully been sent to the destination. + """ + return self.runInteraction( + "mark_as_sent_devices_by_remote", self._mark_as_sent_devices_by_remote_txn, + destination, stream_id, + ) + + def _mark_as_sent_devices_by_remote_txn(self, txn, destination, stream_id): + # We update the device_lists_outbound_last_success with the successfully + # poked users. We do the join to see which users need to be inserted and + # which updated. + sql = """ + SELECT user_id, coalesce(max(o.stream_id), 0), (max(s.stream_id) IS NOT NULL) + FROM device_lists_outbound_pokes as o + LEFT JOIN device_lists_outbound_last_success as s + USING (destination, user_id) + WHERE destination = ? AND o.stream_id <= ? + GROUP BY user_id + """ + txn.execute(sql, (destination, stream_id,)) + rows = txn.fetchall() + + sql = """ + UPDATE device_lists_outbound_last_success + SET stream_id = ? + WHERE destination = ? AND user_id = ? + """ + txn.executemany( + sql, ((row[1], destination, row[0],) for row in rows if row[2]) + ) + + sql = """ + INSERT INTO device_lists_outbound_last_success + (destination, user_id, stream_id) VALUES (?, ?, ?) + """ + txn.executemany( + sql, ((destination, row[0], row[1],) for row in rows if not row[2]) + ) + + # Delete all sent outbound pokes + sql = """ + DELETE FROM device_lists_outbound_pokes + WHERE destination = ? AND stream_id <= ? + """ + txn.execute(sql, (destination, stream_id,)) + + def get_device_stream_token(self): + return self._device_list_id_gen.get_current_token() + + @defer.inlineCallbacks + def get_user_devices_from_cache(self, query_list): + """Get the devices (and keys if any) for remote users from the cache. + + Args: + query_list(list): List of (user_id, device_ids), if device_ids is + falsey then return all device ids for that user. + + Returns: + (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is + a set of user_ids and results_map is a mapping of + user_id -> device_id -> device_info + """ + user_ids = set(user_id for user_id, _ in query_list) + user_map = yield self.get_device_list_last_stream_id_for_remotes(list(user_ids)) + user_ids_in_cache = set( + user_id for user_id, stream_id in user_map.items() if stream_id + ) + user_ids_not_in_cache = user_ids - user_ids_in_cache + + results = {} + for user_id, device_id in query_list: + if user_id not in user_ids_in_cache: + continue + + if device_id: + device = yield self._get_cached_user_device(user_id, device_id) + results.setdefault(user_id, {})[device_id] = device + else: + results[user_id] = yield self._get_cached_devices_for_user(user_id) + + defer.returnValue((user_ids_not_in_cache, results)) + + @cachedInlineCallbacks(num_args=2, tree=True) + def _get_cached_user_device(self, user_id, device_id): + content = yield self._simple_select_one_onecol( + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + retcol="content", + desc="_get_cached_user_device", + ) + defer.returnValue(db_to_json(content)) + + @cachedInlineCallbacks() + def _get_cached_devices_for_user(self, user_id): + devices = yield self._simple_select_list( + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + }, + retcols=("device_id", "content"), + desc="_get_cached_devices_for_user", + ) + defer.returnValue({ + device["device_id"]: db_to_json(device["content"]) + for device in devices + }) + + def get_devices_with_keys_by_user(self, user_id): + """Get all devices (with any device keys) for a user + + Returns: + (stream_id, devices) + """ + return self.runInteraction( + "get_devices_with_keys_by_user", + self._get_devices_with_keys_by_user_txn, user_id, + ) + + def _get_devices_with_keys_by_user_txn(self, txn, user_id): + now_stream_id = self._device_list_id_gen.get_current_token() + + devices = self._get_e2e_device_keys_txn( + txn, [(user_id, None)], include_all_devices=True + ) + + if devices: + user_devices = devices[user_id] + results = [] + for device_id, device in iteritems(user_devices): + result = { + "device_id": device_id, + } + + key_json = device.get("key_json", None) + if key_json: + result["keys"] = db_to_json(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + + results.append(result) + + return now_stream_id, results + + return now_stream_id, [] + + @defer.inlineCallbacks + def get_user_whose_devices_changed(self, from_key): + """Get set of users whose devices have changed since `from_key`. + """ + from_key = int(from_key) + changed = self._device_list_stream_cache.get_all_entities_changed(from_key) + if changed is not None: + defer.returnValue(set(changed)) + + sql = """ + SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? + """ + rows = yield self._execute("get_user_whose_devices_changed", None, sql, from_key) + defer.returnValue(set(row[0] for row in rows)) + + def get_all_device_list_changes_for_remotes(self, from_key, to_key): + """Return a list of `(stream_id, user_id, destination)` which is the + combined list of changes to devices, and which destinations need to be + poked. `destination` may be None if no destinations need to be poked. + """ + # We do a group by here as there can be a large number of duplicate + # entries, since we throw away device IDs. + sql = """ + SELECT MAX(stream_id) AS stream_id, user_id, destination + FROM device_lists_stream + LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) + WHERE ? < stream_id AND stream_id <= ? + GROUP BY user_id, destination + """ + return self._execute( + "get_all_device_list_changes_for_remotes", None, + sql, from_key, to_key + ) + + @cached(max_entries=10000) + def get_device_list_last_stream_id_for_remote(self, user_id): + """Get the last stream_id we got for a user. May be None if we haven't + got any information for them. + """ + return self._simple_select_one_onecol( + table="device_lists_remote_extremeties", + keyvalues={"user_id": user_id}, + retcol="stream_id", + desc="get_device_list_last_stream_id_for_remote", + allow_none=True, + ) + + @cachedList(cached_method_name="get_device_list_last_stream_id_for_remote", + list_name="user_ids", inlineCallbacks=True) + def get_device_list_last_stream_id_for_remotes(self, user_ids): + rows = yield self._simple_select_many_batch( + table="device_lists_remote_extremeties", + column="user_id", + iterable=user_ids, + retcols=("user_id", "stream_id",), + desc="get_device_list_last_stream_id_for_remotes", + ) + + results = {user_id: None for user_id in user_ids} + results.update({ + row["user_id"]: row["stream_id"] for row in rows + }) + + defer.returnValue(results) + + +class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): def __init__(self, db_conn, hs): super(DeviceStore, self).__init__(db_conn, hs) @@ -121,24 +456,6 @@ class DeviceStore(BackgroundUpdateStore): initial_device_display_name, e) raise StoreError(500, "Problem storing device.") - def get_device(self, user_id, device_id): - """Retrieve a device. - - Args: - user_id (str): The ID of the user which owns the device - device_id (str): The ID of the device to retrieve - Returns: - defer.Deferred for a dict containing the device information - Raises: - StoreError: if the device is not found - """ - return self._simple_select_one( - table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, - retcols=("user_id", "device_id", "display_name"), - desc="get_device", - ) - @defer.inlineCallbacks def delete_device(self, user_id, device_id): """Delete a device. @@ -203,57 +520,6 @@ class DeviceStore(BackgroundUpdateStore): ) @defer.inlineCallbacks - def get_devices_by_user(self, user_id): - """Retrieve all of a user's registered devices. - - Args: - user_id (str): - Returns: - defer.Deferred: resolves to a dict from device_id to a dict - containing "device_id", "user_id" and "display_name" for each - device. - """ - devices = yield self._simple_select_list( - table="devices", - keyvalues={"user_id": user_id}, - retcols=("user_id", "device_id", "display_name"), - desc="get_devices_by_user" - ) - - defer.returnValue({d["device_id"]: d for d in devices}) - - @cached(max_entries=10000) - def get_device_list_last_stream_id_for_remote(self, user_id): - """Get the last stream_id we got for a user. May be None if we haven't - got any information for them. - """ - return self._simple_select_one_onecol( - table="device_lists_remote_extremeties", - keyvalues={"user_id": user_id}, - retcol="stream_id", - desc="get_device_list_remote_extremity", - allow_none=True, - ) - - @cachedList(cached_method_name="get_device_list_last_stream_id_for_remote", - list_name="user_ids", inlineCallbacks=True) - def get_device_list_last_stream_id_for_remotes(self, user_ids): - rows = yield self._simple_select_many_batch( - table="device_lists_remote_extremeties", - column="user_id", - iterable=user_ids, - retcols=("user_id", "stream_id",), - desc="get_user_devices_from_cache", - ) - - results = {user_id: None for user_id in user_ids} - results.update({ - row["user_id"]: row["stream_id"] for row in rows - }) - - defer.returnValue(results) - - @defer.inlineCallbacks def mark_remote_user_device_list_as_unsubscribed(self, user_id): """Mark that we no longer track device lists for remote user. """ @@ -405,268 +671,6 @@ class DeviceStore(BackgroundUpdateStore): lock=False, ) - def get_devices_by_remote(self, destination, from_stream_id): - """Get stream of updates to send to remote servers - - Returns: - (int, list[dict]): current stream id and list of updates - """ - now_stream_id = self._device_list_id_gen.get_current_token() - - has_changed = self._device_list_federation_stream_cache.has_entity_changed( - destination, int(from_stream_id) - ) - if not has_changed: - return (now_stream_id, []) - - return self.runInteraction( - "get_devices_by_remote", self._get_devices_by_remote_txn, - destination, from_stream_id, now_stream_id, - ) - - def _get_devices_by_remote_txn(self, txn, destination, from_stream_id, - now_stream_id): - sql = """ - SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes - WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? - GROUP BY user_id, device_id - LIMIT 20 - """ - txn.execute( - sql, (destination, from_stream_id, now_stream_id, False) - ) - - # maps (user_id, device_id) -> stream_id - query_map = {(r[0], r[1]): r[2] for r in txn} - if not query_map: - return (now_stream_id, []) - - if len(query_map) >= 20: - now_stream_id = max(stream_id for stream_id in itervalues(query_map)) - - devices = self._get_e2e_device_keys_txn( - txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True - ) - - prev_sent_id_sql = """ - SELECT coalesce(max(stream_id), 0) as stream_id - FROM device_lists_outbound_last_success - WHERE destination = ? AND user_id = ? AND stream_id <= ? - """ - - results = [] - for user_id, user_devices in iteritems(devices): - # The prev_id for the first row is always the last row before - # `from_stream_id` - txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) - rows = txn.fetchall() - prev_id = rows[0][0] - for device_id, device in iteritems(user_devices): - stream_id = query_map[(user_id, device_id)] - result = { - "user_id": user_id, - "device_id": device_id, - "prev_id": [prev_id] if prev_id else [], - "stream_id": stream_id, - } - - prev_id = stream_id - - if device is not None: - key_json = device.get("key_json", None) - if key_json: - result["keys"] = db_to_json(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name - else: - result["deleted"] = True - - results.append(result) - - return (now_stream_id, results) - - @defer.inlineCallbacks - def get_user_devices_from_cache(self, query_list): - """Get the devices (and keys if any) for remote users from the cache. - - Args: - query_list(list): List of (user_id, device_ids), if device_ids is - falsey then return all device ids for that user. - - Returns: - (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is - a set of user_ids and results_map is a mapping of - user_id -> device_id -> device_info - """ - user_ids = set(user_id for user_id, _ in query_list) - user_map = yield self.get_device_list_last_stream_id_for_remotes(list(user_ids)) - user_ids_in_cache = set( - user_id for user_id, stream_id in user_map.items() if stream_id - ) - user_ids_not_in_cache = user_ids - user_ids_in_cache - - results = {} - for user_id, device_id in query_list: - if user_id not in user_ids_in_cache: - continue - - if device_id: - device = yield self._get_cached_user_device(user_id, device_id) - results.setdefault(user_id, {})[device_id] = device - else: - results[user_id] = yield self._get_cached_devices_for_user(user_id) - - defer.returnValue((user_ids_not_in_cache, results)) - - @cachedInlineCallbacks(num_args=2, tree=True) - def _get_cached_user_device(self, user_id, device_id): - content = yield self._simple_select_one_onecol( - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - retcol="content", - desc="_get_cached_user_device", - ) - defer.returnValue(db_to_json(content)) - - @cachedInlineCallbacks() - def _get_cached_devices_for_user(self, user_id): - devices = yield self._simple_select_list( - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - }, - retcols=("device_id", "content"), - desc="_get_cached_devices_for_user", - ) - defer.returnValue({ - device["device_id"]: db_to_json(device["content"]) - for device in devices - }) - - def get_devices_with_keys_by_user(self, user_id): - """Get all devices (with any device keys) for a user - - Returns: - (stream_id, devices) - """ - return self.runInteraction( - "get_devices_with_keys_by_user", - self._get_devices_with_keys_by_user_txn, user_id, - ) - - def _get_devices_with_keys_by_user_txn(self, txn, user_id): - now_stream_id = self._device_list_id_gen.get_current_token() - - devices = self._get_e2e_device_keys_txn( - txn, [(user_id, None)], include_all_devices=True - ) - - if devices: - user_devices = devices[user_id] - results = [] - for device_id, device in iteritems(user_devices): - result = { - "device_id": device_id, - } - - key_json = device.get("key_json", None) - if key_json: - result["keys"] = db_to_json(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name - - results.append(result) - - return now_stream_id, results - - return now_stream_id, [] - - def mark_as_sent_devices_by_remote(self, destination, stream_id): - """Mark that updates have successfully been sent to the destination. - """ - return self.runInteraction( - "mark_as_sent_devices_by_remote", self._mark_as_sent_devices_by_remote_txn, - destination, stream_id, - ) - - def _mark_as_sent_devices_by_remote_txn(self, txn, destination, stream_id): - # We update the device_lists_outbound_last_success with the successfully - # poked users. We do the join to see which users need to be inserted and - # which updated. - sql = """ - SELECT user_id, coalesce(max(o.stream_id), 0), (max(s.stream_id) IS NOT NULL) - FROM device_lists_outbound_pokes as o - LEFT JOIN device_lists_outbound_last_success as s - USING (destination, user_id) - WHERE destination = ? AND o.stream_id <= ? - GROUP BY user_id - """ - txn.execute(sql, (destination, stream_id,)) - rows = txn.fetchall() - - sql = """ - UPDATE device_lists_outbound_last_success - SET stream_id = ? - WHERE destination = ? AND user_id = ? - """ - txn.executemany( - sql, ((row[1], destination, row[0],) for row in rows if row[2]) - ) - - sql = """ - INSERT INTO device_lists_outbound_last_success - (destination, user_id, stream_id) VALUES (?, ?, ?) - """ - txn.executemany( - sql, ((destination, row[0], row[1],) for row in rows if not row[2]) - ) - - # Delete all sent outbound pokes - sql = """ - DELETE FROM device_lists_outbound_pokes - WHERE destination = ? AND stream_id <= ? - """ - txn.execute(sql, (destination, stream_id,)) - - @defer.inlineCallbacks - def get_user_whose_devices_changed(self, from_key): - """Get set of users whose devices have changed since `from_key`. - """ - from_key = int(from_key) - changed = self._device_list_stream_cache.get_all_entities_changed(from_key) - if changed is not None: - defer.returnValue(set(changed)) - - sql = """ - SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? - """ - rows = yield self._execute("get_user_whose_devices_changed", None, sql, from_key) - defer.returnValue(set(row[0] for row in rows)) - - def get_all_device_list_changes_for_remotes(self, from_key, to_key): - """Return a list of `(stream_id, user_id, destination)` which is the - combined list of changes to devices, and which destinations need to be - poked. `destination` may be None if no destinations need to be poked. - """ - # We do a group by here as there can be a large number of duplicate - # entries, since we throw away device IDs. - sql = """ - SELECT MAX(stream_id) AS stream_id, user_id, destination - FROM device_lists_stream - LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) - WHERE ? < stream_id AND stream_id <= ? - GROUP BY user_id, destination - """ - return self._execute( - "get_all_device_list_changes_for_remotes", None, - sql, from_key, to_key - ) - @defer.inlineCallbacks def add_device_change_to_streams(self, user_id, device_ids, hosts): """Persist that a user's devices have been updated, and which hosts @@ -732,9 +736,6 @@ class DeviceStore(BackgroundUpdateStore): ] ) - def get_device_stream_token(self): - return self._device_list_id_gen.get_current_token() - def _prune_old_outbound_device_pokes(self): """Delete old entries out of the device_lists_outbound_pokes to ensure that we don't fill up due to dead servers. We keep one entry per diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 2a0f6cfca9..e381e472a2 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -23,49 +23,7 @@ from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore, db_to_json -class EndToEndKeyStore(SQLBaseStore): - def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys): - """Stores device keys for a device. Returns whether there was a change - or the keys were already in the database. - """ - def _set_e2e_device_keys_txn(txn): - old_key_json = self._simple_select_one_onecol_txn( - txn, - table="e2e_device_keys_json", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - retcol="key_json", - allow_none=True, - ) - - # In py3 we need old_key_json to match new_key_json type. The DB - # returns unicode while encode_canonical_json returns bytes. - new_key_json = encode_canonical_json(device_keys).decode("utf-8") - - if old_key_json == new_key_json: - return False - - self._simple_upsert_txn( - txn, - table="e2e_device_keys_json", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - values={ - "ts_added_ms": time_now, - "key_json": new_key_json, - } - ) - - return True - - return self.runInteraction( - "set_e2e_device_keys", _set_e2e_device_keys_txn - ) - +class EndToEndKeyWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_e2e_device_keys( self, query_list, include_all_devices=False, @@ -238,6 +196,50 @@ class EndToEndKeyStore(SQLBaseStore): "count_e2e_one_time_keys", _count_e2e_one_time_keys ) + +class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): + def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys): + """Stores device keys for a device. Returns whether there was a change + or the keys were already in the database. + """ + def _set_e2e_device_keys_txn(txn): + old_key_json = self._simple_select_one_onecol_txn( + txn, + table="e2e_device_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + retcol="key_json", + allow_none=True, + ) + + # In py3 we need old_key_json to match new_key_json type. The DB + # returns unicode while encode_canonical_json returns bytes. + new_key_json = encode_canonical_json(device_keys).decode("utf-8") + + if old_key_json == new_key_json: + return False + + self._simple_upsert_txn( + txn, + table="e2e_device_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={ + "ts_added_ms": time_now, + "key_json": new_key_json, + } + ) + + return True + + return self.runInteraction( + "set_e2e_device_keys", _set_e2e_device_keys_txn + ) + def claim_e2e_one_time_keys(self, query_list): """Take a list of one time keys out of the database""" def _claim_e2e_one_time_keys(txn): diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 38809ed0fc..a8d90456e3 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -442,6 +442,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, event_results.reverse() return event_results + @defer.inlineCallbacks + def get_successor_events(self, event_ids): + """Fetch all events that have the given events as a prev event + + Args: + event_ids (iterable[str]) + + Returns: + Deferred[list[str]] + """ + rows = yield self._simple_select_many_batch( + table="event_edges", + column="prev_event_id", + iterable=event_ids, + retcols=("event_id",), + desc="get_successor_events" + ) + + defer.returnValue([ + row["event_id"] for row in rows + ]) + class EventFederationStore(EventFederationWorkerStore): """ Responsible for storing and serving up the various graphs associated diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 06db9e56e6..428300ea0a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -537,6 +537,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore new_events = [ event for event, ctx in event_contexts if not event.internal_metadata.is_outlier() and not ctx.rejected + and not event.internal_metadata.is_soft_failed() ] # start with the existing forward extremities @@ -1406,21 +1407,6 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore values=state_values, ) - self._simple_insert_many_txn( - txn, - table="event_edges", - values=[ - { - "event_id": event.event_id, - "prev_event_id": prev_id, - "room_id": event.room_id, - "is_state": True, - } - for event, _ in state_events_and_contexts - for prev_id, _ in event.prev_state - ], - ) - # Prefill the event cache self._add_to_cache(txn, events_and_contexts) diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 6a5028961d..4b8438c3e9 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -186,6 +186,63 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, defer.returnValue(results) @defer.inlineCallbacks + def move_push_rule_from_room_to_room( + self, new_room_id, user_id, rule, + ): + """Move a single push rule from one room to another for a specific user. + + Args: + new_room_id (str): ID of the new room. + user_id (str): ID of user the push rule belongs to. + rule (Dict): A push rule. + """ + # Create new rule id + rule_id_scope = '/'.join(rule["rule_id"].split('/')[:-1]) + new_rule_id = rule_id_scope + "/" + new_room_id + + # Change room id in each condition + for condition in rule.get("conditions", []): + if condition.get("key") == "room_id": + condition["pattern"] = new_room_id + + # Add the rule for the new room + yield self.add_push_rule( + user_id=user_id, + rule_id=new_rule_id, + priority_class=rule["priority_class"], + conditions=rule["conditions"], + actions=rule["actions"], + ) + + # Delete push rule for the old room + yield self.delete_push_rule(user_id, rule["rule_id"]) + + @defer.inlineCallbacks + def move_push_rules_from_room_to_room_for_user( + self, old_room_id, new_room_id, user_id, + ): + """Move all of the push rules from one room to another for a specific + user. + + Args: + old_room_id (str): ID of the old room. + new_room_id (str): ID of the new room. + user_id (str): ID of user to copy push rules for. + """ + # Retrieve push rules for this user + user_push_rules = yield self.get_push_rules_for_user(user_id) + + # Get rules relating to the old room, move them to the new room, then + # delete them from the old room + for rule in user_push_rules: + conditions = rule.get("conditions", []) + if any((c.get("key") == "room_id" and + c.get("pattern") == old_room_id) for c in conditions): + self.move_push_rule_from_room_to_room( + new_room_id, user_id, rule, + ) + + @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): state_group = context.state_group if not state_group: diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 0ac665e967..0fd1ccc40a 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -346,15 +346,23 @@ class ReceiptsStore(ReceiptsWorkerStore): def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): + """Inserts a read-receipt into the database if it's newer than the current RR + + Returns: int|None + None if the RR is older than the current RR + otherwise, the rx timestamp of the event that the RR corresponds to + (or 0 if the event is unknown) + """ res = self._simple_select_one_txn( txn, table="events", - retcols=["topological_ordering", "stream_ordering"], + retcols=["stream_ordering", "received_ts"], keyvalues={"event_id": event_id}, allow_none=True ) stream_ordering = int(res["stream_ordering"]) if res else None + rx_ts = res["received_ts"] if res else 0 # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts @@ -373,7 +381,7 @@ class ReceiptsStore(ReceiptsWorkerStore): "one for later event %s", event_id, eid, ) - return False + return None txn.call_after( self.get_receipts_for_room.invalidate, (room_id, receipt_type) @@ -429,7 +437,7 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_ordering=stream_ordering, ) - return True + return rx_ts @defer.inlineCallbacks def insert_receipt(self, room_id, receipt_type, user_id, event_ids, data): @@ -466,7 +474,7 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_id_manager = self._receipts_id_gen.get_next() with stream_id_manager as stream_id: - have_persisted = yield self.runInteraction( + event_ts = yield self.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, room_id, receipt_type, user_id, linearized_event_id, @@ -474,8 +482,14 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_id=stream_id, ) - if not have_persisted: - defer.returnValue(None) + if event_ts is None: + defer.returnValue(None) + + now = self._clock.time_msec() + logger.debug( + "RR for event %s in %s (%i ms old)", + linearized_event_id, room_id, now - event_ts, + ) yield self.insert_graph_receipt( room_id, receipt_type, user_id, event_ids, data diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9b9572890b..9b6c28892c 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -295,6 +295,39 @@ class RegistrationWorkerStore(SQLBaseStore): return ret['user_id'] return None + @defer.inlineCallbacks + def user_add_threepid(self, user_id, medium, address, validated_at, added_at): + yield self._simple_upsert("user_threepids", { + "medium": medium, + "address": address, + }, { + "user_id": user_id, + "validated_at": validated_at, + "added_at": added_at, + }) + + @defer.inlineCallbacks + def user_get_threepids(self, user_id): + ret = yield self._simple_select_list( + "user_threepids", { + "user_id": user_id + }, + ['medium', 'address', 'validated_at', 'added_at'], + 'user_get_threepids' + ) + defer.returnValue(ret) + + def user_delete_threepid(self, user_id, medium, address): + return self._simple_delete( + "user_threepids", + keyvalues={ + "user_id": user_id, + "medium": medium, + "address": address, + }, + desc="user_delete_threepids", + ) + class RegistrationStore(RegistrationWorkerStore, background_updates.BackgroundUpdateStore): @@ -633,39 +666,6 @@ class RegistrationStore(RegistrationWorkerStore, defer.returnValue(res if res else False) @defer.inlineCallbacks - def user_add_threepid(self, user_id, medium, address, validated_at, added_at): - yield self._simple_upsert("user_threepids", { - "medium": medium, - "address": address, - }, { - "user_id": user_id, - "validated_at": validated_at, - "added_at": added_at, - }) - - @defer.inlineCallbacks - def user_get_threepids(self, user_id): - ret = yield self._simple_select_list( - "user_threepids", { - "user_id": user_id - }, - ['medium', 'address', 'validated_at', 'added_at'], - 'user_get_threepids' - ) - defer.returnValue(ret) - - def user_delete_threepid(self, user_id, medium, address): - return self._simple_delete( - "user_threepids", - keyvalues={ - "user_id": user_id, - "medium": medium, - "address": address, - }, - desc="user_delete_threepids", - ) - - @defer.inlineCallbacks def save_or_get_3pid_guest_access_token( self, medium, address, access_token, inviter_user_id ): diff --git a/synapse/storage/schema/delta/53/user_dir_populate.sql b/synapse/storage/schema/delta/53/user_dir_populate.sql new file mode 100644 index 0000000000..955b8fdbd6 --- /dev/null +++ b/synapse/storage/schema/delta/53/user_dir_populate.sql @@ -0,0 +1,30 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_user_directory_createtables', '{}'); + +-- Run through each room and update the user directory according to who is in it +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); + +-- Insert all users, if search_all_users is on +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_users', '{}', 'populate_user_directory_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/schema/delta/53/user_share.sql new file mode 100644 index 0000000000..5831b1a6f8 --- /dev/null +++ b/synapse/storage/schema/delta/53/user_share.sql @@ -0,0 +1,44 @@ +/* Copyright 2017 Vector Creations Ltd, 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Old disused version of the tables below. +DROP TABLE IF EXISTS users_who_share_rooms; + +-- Tables keeping track of what users share rooms. This is a map of local users +-- to local or remote users, per room. Remote users cannot be in the user_id +-- column, only the other_user_id column. There are two tables, one for public +-- rooms and those for private rooms. +CREATE TABLE IF NOT EXISTS users_who_share_public_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS users_who_share_private_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_who_share_public_rooms_u_idx ON users_who_share_public_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_public_rooms_r_idx ON users_who_share_public_rooms(room_id); +CREATE INDEX users_who_share_public_rooms_o_idx ON users_who_share_public_rooms(other_user_id); + +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id); + +-- Make sure that we populate the tables initially by resetting the stream ID +UPDATE user_directory_stream_pos SET stream_id = NULL; diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql new file mode 100644 index 0000000000..f7827ca6d2 --- /dev/null +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -0,0 +1,28 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We don't need the old version of this table. +DROP TABLE IF EXISTS users_in_public_rooms; + +-- Old version of users_in_public_rooms +DROP TABLE IF EXISTS users_who_share_public_rooms; + +-- Track what users are in public rooms. +CREATE TABLE IF NOT EXISTS users_in_public_rooms ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); diff --git a/synapse/storage/schema/full_schemas/11/event_edges.sql b/synapse/storage/schema/full_schemas/11/event_edges.sql index 52eec88357..bccd1c6f74 100644 --- a/synapse/storage/schema/full_schemas/11/event_edges.sql +++ b/synapse/storage/schema/full_schemas/11/event_edges.sql @@ -37,6 +37,8 @@ CREATE TABLE IF NOT EXISTS event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, + -- We no longer insert prev_state into this table, so all new rows will have + -- is_state as false. is_state BOOL NOT NULL, UNIQUE (event_id, prev_event_id, room_id, is_state) ); diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index d6cfdba519..580fafeb3a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -191,6 +191,25 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, order='DESC'): + """Get new room events in stream ordering since `from_key`. + + Args: + room_id (str) + from_key (str): Token from which no events are returned before + to_key (str): Token from which no events are returned after. (This + is typically the current stream token) + limit (int): Maximum number of events to return + order (str): Either "DESC" or "ASC". Determines which events are + returned when the result is limited. If "DESC" then the most + recent `limit` events are returned, otherwise returns the + oldest `limit` events. + + Returns: + Deferred[dict[str,tuple[list[FrozenEvent], str]]] + A map from room id to a tuple containing: + - list of recent events in the room + - stream ordering key for the start of the chunk of events returned. + """ from_id = RoomStreamToken.parse_stream_token(from_key).stream room_ids = yield self._events_stream_cache.get_entities_changed( diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index fea866c043..4ee653210f 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -16,22 +16,288 @@ import logging import re -from six import iteritems - from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks - -from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(SQLBaseStore): +TEMP_TABLE = "_temp_populate_user_directory" + + +class UserDirectoryStore(BackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(UserDirectoryStore, self).__init__(db_conn, hs) + + self.server_name = hs.hostname + + self.register_background_update_handler( + "populate_user_directory_createtables", + self._populate_user_directory_createtables, + ) + self.register_background_update_handler( + "populate_user_directory_process_rooms", + self._populate_user_directory_process_rooms, + ) + self.register_background_update_handler( + "populate_user_directory_process_users", + self._populate_user_directory_process_users, + ) + self.register_background_update_handler( + "populate_user_directory_cleanup", self._populate_user_directory_cleanup + ) + + @defer.inlineCallbacks + def _populate_user_directory_createtables(self, progress, batch_size): + + # Get all the rooms that we want to process. + def _make_staging_area(txn): + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)" + ) + txn.execute(sql) + + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_position(position TEXT NOT NULL)" + ) + txn.execute(sql) + + # Get rooms we want to process from the database + sql = """ + SELECT room_id, count(*) FROM current_state_events + GROUP BY room_id + """ + txn.execute(sql) + rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] + self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) + del rooms + + # If search all users is on, get all the users we want to add. + if self.hs.config.user_directory_search_all_users: + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_users(user_id TEXT NOT NULL)" + ) + txn.execute(sql) + + txn.execute("SELECT name FROM users") + users = [{"user_id": x[0]} for x in txn.fetchall()] + + self._simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + + new_pos = yield self.get_max_stream_id_in_current_state_deltas() + yield self.runInteraction( + "populate_user_directory_temp_build", _make_staging_area + ) + yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos}) + + yield self._end_background_update("populate_user_directory_createtables") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_cleanup(self, progress, batch_size): + """ + Update the user directory stream position, then clean up the old tables. + """ + position = yield self._simple_select_one_onecol( + TEMP_TABLE + "_position", None, "position" + ) + yield self.update_user_directory_stream_pos(position) + + def _delete_staging_area(txn): + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position") + + yield self.runInteraction( + "populate_user_directory_cleanup", _delete_staging_area + ) + + yield self._end_background_update("populate_user_directory_cleanup") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_process_rooms(self, progress, batch_size): + + state = self.hs.get_state_handler() + + # If we don't have progress filed, delete everything. + if not progress: + yield self.delete_all_from_user_dir() + + def _get_next_batch(txn): + sql = """ + SELECT room_id FROM %s + ORDER BY events DESC + LIMIT %s + """ % ( + TEMP_TABLE + "_rooms", + str(batch_size), + ) + txn.execute(sql) + rooms_to_work_on = txn.fetchall() + + if not rooms_to_work_on: + return None + + rooms_to_work_on = [x[0] for x in rooms_to_work_on] + + # Get how many are left to process, so we can give status on how + # far we are in processing + txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") + progress["remaining"] = txn.fetchone()[0] + + return rooms_to_work_on + + rooms_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more rooms -- complete the transaction. + if not rooms_to_work_on: + yield self._end_background_update("populate_user_directory_process_rooms") + defer.returnValue(1) + + logger.info( + "Processing the next %d rooms of %d remaining" + % (len(rooms_to_work_on), progress["remaining"]) + ) + + for room_id in rooms_to_work_on: + is_in_room = yield self.is_host_joined(room_id, self.server_name) + + if is_in_room: + is_public = yield self.is_room_world_readable_or_publicly_joinable( + room_id + ) + + users_with_profile = yield state.get_current_user_in_room(room_id) + user_ids = set(users_with_profile) + + # Update each user in the user directory. + for user_id, profile in users_with_profile.items(): + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + to_insert = set() + + if is_public: + for user_id in user_ids: + if self.get_if_app_services_interested_in_user(user_id): + continue + + to_insert.add(user_id) + + if to_insert: + yield self.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + else: + for user_id in user_ids: + if not self.hs.is_mine_id(user_id): + continue + + if self.get_if_app_services_interested_in_user(user_id): + continue + + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + user_set = (user_id, other_user_id) + to_insert.add(user_set) + + if to_insert: + yield self.add_users_who_share_private_room(room_id, to_insert) + to_insert.clear() + + # We've finished a room. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_rooms", + progress, + ) + + defer.returnValue(len(rooms_to_work_on)) + + @defer.inlineCallbacks + def _populate_user_directory_process_users(self, progress, batch_size): + """ + If search_all_users is enabled, add all of the users to the user directory. + """ + if not self.hs.config.user_directory_search_all_users: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + def _get_next_batch(txn): + sql = "SELECT user_id FROM %s LIMIT %s" % ( + TEMP_TABLE + "_users", + str(batch_size), + ) + txn.execute(sql) + users_to_work_on = txn.fetchall() + + if not users_to_work_on: + return None + + users_to_work_on = [x[0] for x in users_to_work_on] + + # Get how many are left to process, so we can give status on how + # far we are in processing + sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users" + txn.execute(sql) + progress["remaining"] = txn.fetchone()[0] + + return users_to_work_on + + users_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more users -- complete the transaction. + if not users_to_work_on: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + logger.info( + "Processing the next %d users of %d remaining" + % (len(users_to_work_on), progress["remaining"]) + ) + + for user_id in users_to_work_on: + profile = yield self.get_profileinfo(get_localpart_from_id(user_id)) + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + # We've finished processing a user. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_users", {"user_id": user_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_users", + progress, + ) + + defer.returnValue(len(users_to_work_on)) + @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -63,106 +329,16 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) - @defer.inlineCallbacks - def add_users_to_public_room(self, room_id, user_ids): - """Add user to the list of users in public rooms - - Args: - room_id (str): A room_id that all users are in that is world_readable - or publically joinable - user_ids (list(str)): Users to add + def update_profile_in_user_dir(self, user_id, display_name, avatar_url): """ - yield self._simple_insert_many( - table="users_in_public_rooms", - values=[{"user_id": user_id, "room_id": room_id} for user_id in user_ids], - desc="add_users_to_public_room", - ) - for user_id in user_ids: - self.get_user_in_public_room.invalidate((user_id,)) - - def add_profiles_to_user_dir(self, room_id, users_with_profile): - """Add profiles to the user directory - - Args: - room_id (str): A room_id that all users are joined to - users_with_profile (dict): Users to add to directory in the form of - mapping of user_id -> ProfileInfo + Update or add a user's profile in the user directory. """ - if isinstance(self.database_engine, PostgresEngine): - # We weight the loclpart most highly, then display name and finally - # server name - sql = """ - INSERT INTO user_directory_search(user_id, vector) - VALUES (?, - setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - ) - """ - args = ( - ( - user_id, - get_localpart_from_id(user_id), - get_domain_from_id(user_id), - profile.display_name, - ) - for user_id, profile in iteritems(users_with_profile) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = """ - INSERT INTO user_directory_search(user_id, value) - VALUES (?,?) - """ - args = ( - ( - user_id, - "%s %s" % (user_id, p.display_name) if p.display_name else user_id, - ) - for user_id, p in iteritems(users_with_profile) - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - def _add_profiles_to_user_dir_txn(txn): - txn.executemany(sql, args) - self._simple_insert_many_txn( - txn, - table="user_directory", - values=[ - { - "user_id": user_id, - "room_id": room_id, - "display_name": profile.display_name, - "avatar_url": profile.avatar_url, - } - for user_id, profile in iteritems(users_with_profile) - ], - ) - for user_id in users_with_profile: - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - - return self.runInteraction( - "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn - ) - - @defer.inlineCallbacks - def update_user_in_user_dir(self, user_id, room_id): - yield self._simple_update_one( - table="user_directory", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_user_dir", - ) - self.get_user_in_directory.invalidate((user_id,)) - def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id): def _update_profile_in_user_dir_txn(txn): new_entry = self._simple_upsert_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, - insertion_values={"room_id": room_id}, values={"display_name": display_name, "avatar_url": avatar_url}, lock=False, # We're only inserter ) @@ -250,16 +426,6 @@ class UserDirectoryStore(SQLBaseStore): "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) - @defer.inlineCallbacks - def update_user_in_public_user_list(self, user_id, room_id): - yield self._simple_update_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_public_user_list", - ) - self.get_user_in_public_room.invalidate((user_id,)) - def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( @@ -271,76 +437,45 @@ class UserDirectoryStore(SQLBaseStore): self._simple_delete_txn( txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id}, + ) txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - txn.call_after(self.get_user_in_public_room.invalidate, (user_id,)) return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) @defer.inlineCallbacks - def remove_from_user_in_public_room(self, user_id): - yield self._simple_delete( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - desc="remove_from_user_in_public_room", - ) - self.get_user_in_public_room.invalidate((user_id,)) - - def get_users_in_public_due_to_room(self, room_id): - """Get all user_ids that are in the room directory because they're - in the given room_id - """ - return self._simple_select_onecol( - table="users_in_public_rooms", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_public_due_to_room", - ) - - @defer.inlineCallbacks def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory because they're in the given room_id """ - user_ids_dir = yield self._simple_select_onecol( - table="user_directory", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids_pub = yield self._simple_select_onecol( + user_ids_share_pub = yield self._simple_select_onecol( table="users_in_public_rooms", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", ) - user_ids_share = yield self._simple_select_onecol( - table="users_who_share_rooms", + user_ids_share_priv = yield self._simple_select_onecol( + table="users_who_share_private_rooms", keyvalues={"room_id": room_id}, - retcol="user_id", + retcol="other_user_id", desc="get_users_in_dir_due_to_room", ) - user_ids = set(user_ids_dir) - user_ids.update(user_ids_pub) - user_ids.update(user_ids_share) + user_ids = set(user_ids_share_pub) + user_ids.update(user_ids_share_priv) defer.returnValue(user_ids) @defer.inlineCallbacks - def get_all_rooms(self): - """Get all room_ids we've ever known about, in ascending order of "size" - """ - sql = """ - SELECT room_id FROM current_state_events - GROUP BY room_id - ORDER BY count(*) ASC - """ - rows = yield self._execute("get_all_rooms", None, sql) - defer.returnValue([room_id for room_id, in rows]) - - @defer.inlineCallbacks def get_all_local_users(self): """Get all local users """ @@ -350,155 +485,116 @@ class UserDirectoryStore(SQLBaseStore): rows = yield self._execute("get_all_local_users", None, sql) defer.returnValue([name for name, in rows]) - def add_users_who_share_room(self, room_id, share_private, user_id_tuples): - """Insert entries into the users_who_share_rooms table. The first + def add_users_who_share_private_room(self, room_id, user_id_tuples): + """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. Args: room_id (str) - share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _add_users_who_share_room_txn(txn): - self._simple_insert_many_txn( + self._simple_upsert_many_txn( txn, - table="users_who_share_rooms", - values=[ - { - "user_id": user_id, - "other_user_id": other_user_id, - "room_id": room_id, - "share_private": share_private, - } + table="users_who_share_private_rooms", + key_names=["user_id", "other_user_id", "room_id"], + key_values=[ + (user_id, other_user_id, room_id) for user_id, other_user_id in user_id_tuples ], + value_names=(), + value_values=None, ) - for user_id, other_user_id in user_id_tuples: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) return self.runInteraction( "add_users_who_share_room", _add_users_who_share_room_txn ) - def update_users_who_share_room(self, room_id, share_private, user_id_sets): - """Updates entries in the users_who_share_rooms table. The first + def add_users_in_public_rooms(self, room_id, user_ids): + """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. Args: room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. + user_ids (list[str]) """ - def _update_users_who_share_room_txn(txn): - sql = """ - UPDATE users_who_share_rooms - SET room_id = ?, share_private = ? - WHERE user_id = ? AND other_user_id = ? - """ - txn.executemany( - sql, ((room_id, share_private, uid, oid) for uid, oid in user_id_sets) + def _add_users_in_public_rooms_txn(txn): + + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id", "room_id"], + key_values=[(user_id, room_id) for user_id in user_ids], + value_names=(), + value_values=None, ) - for user_id, other_user_id in user_id_sets: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) return self.runInteraction( - "update_users_who_share_room", _update_users_who_share_room_txn + "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) - def remove_user_who_share_room(self, user_id, other_user_id): - """Deletes entries in the users_who_share_rooms table. The first + def remove_user_who_share_room(self, user_id, room_id): + """ + Deletes entries in the users_who_share_*_rooms table. The first user should be a local user. Args: + user_id (str) room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _remove_user_who_share_room_txn(txn): self._simple_delete_txn( txn, - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, ) - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id, "room_id": room_id}, ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) + self._simple_delete_txn( + txn, + table="users_in_public_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, ) return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) - @cached(max_entries=500000) - def get_if_users_share_a_room(self, user_id, other_user_id): - """Gets if users share a room. - - Args: - user_id (str): Must be a local user_id - other_user_id (str) - - Returns: - bool|None: None if they don't share a room, otherwise whether they - share a private room or not. + @defer.inlineCallbacks + def get_user_dir_rooms_user_is_in(self, user_id): """ - return self._simple_select_one_onecol( - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, - retcol="share_private", - allow_none=True, - desc="get_if_users_share_a_room", - ) - - @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_users_who_share_room_from_dir(self, user_id): - """Returns the set of users who share a room with `user_id` + Returns the rooms that a user is in. Args: user_id(str): Must be a local user Returns: - dict: user_id -> share_private mapping + list: user_id """ - rows = yield self._simple_select_list( - table="users_who_share_rooms", + rows = yield self._simple_select_onecol( + table="users_who_share_private_rooms", keyvalues={"user_id": user_id}, - retcols=("other_user_id", "share_private"), - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) - defer.returnValue({row["other_user_id"]: row["share_private"] for row in rows}) - - def get_users_in_share_dir_with_room_id(self, user_id, room_id): - """Get all user tuples that are in the users_who_share_rooms due to the - given room_id. - - Returns: - [(user_id, other_user_id)]: where one of the two will match the given - user_id. - """ - sql = """ - SELECT user_id, other_user_id FROM users_who_share_rooms - WHERE room_id = ? AND (user_id = ? OR other_user_id = ?) - """ - return self._execute( - "get_users_in_share_dir_with_room_id", None, sql, room_id, user_id, user_id + pub_rows = yield self._simple_select_onecol( + table="users_in_public_rooms", + keyvalues={"user_id": user_id}, + retcol="room_id", + desc="get_rooms_user_is_in", ) + users = set(pub_rows) + users.update(rows) + defer.returnValue(list(users)) + @defer.inlineCallbacks def get_rooms_in_common_for_users(self, user_id, other_user_id): """Given two user_ids find out the list of rooms they share. @@ -533,11 +629,8 @@ class UserDirectoryStore(SQLBaseStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_rooms") + txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) - txn.call_after(self.get_user_in_public_room.invalidate_all) - txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) - txn.call_after(self.get_if_users_share_a_room.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -548,21 +641,11 @@ class UserDirectoryStore(SQLBaseStore): return self._simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, - retcols=("room_id", "display_name", "avatar_url"), + retcols=("display_name", "avatar_url"), allow_none=True, desc="get_user_in_directory", ) - @cached() - def get_user_in_public_room(self, user_id): - return self._simple_select_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - retcols=("room_id",), - allow_none=True, - desc="get_user_in_public_room", - ) - def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -652,22 +735,19 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # make s.user_id null to keep the ordering algorithm happy - join_clause = """ - CROSS JOIN (SELECT NULL as user_id) AS s - """ - join_args = () - where_clause = "1=1" + join_args = (user_id,) + where_clause = "user_id != ?" else: - join_clause = """ - LEFT JOIN users_in_public_rooms AS p USING (user_id) - LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_rooms - WHERE user_id = ? AND share_private - ) AS s USING (user_id) - """ join_args = (user_id,) - where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" + where_clause = """ + ( + EXISTS (select 1 from users_in_public_rooms WHERE user_id = t.user_id) + OR EXISTS ( + SELECT 1 FROM users_who_share_private_rooms + WHERE user_id = ? AND other_user_id = t.user_id + ) + ) + """ if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -679,14 +759,13 @@ class UserDirectoryStore(SQLBaseStore): # search: (domain, _, display name, localpart) sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND vector @@ to_tsquery('english', ?) ORDER BY - (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) + (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) * ( @@ -708,7 +787,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (full_query, exact_query, prefix_query, limit + 1) @@ -717,9 +795,8 @@ class UserDirectoryStore(SQLBaseStore): sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND value MATCH ? @@ -729,7 +806,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (search_query, limit + 1) diff --git a/synapse/types.py b/synapse/types.py index d8cb64addb..3de94b6335 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -16,6 +16,8 @@ import re import string from collections import namedtuple +import attr + from synapse.api.errors import SynapseError @@ -455,3 +457,13 @@ class ThirdPartyInstanceID( @classmethod def create(cls, appservice_id, network_id,): return cls(appservice_id=appservice_id, network_id=network_id) + + +@attr.s(slots=True) +class ReadReceipt(object): + """Information about a read-receipt""" + room_id = attr.ib() + receipt_type = attr.ib() + user_id = attr.ib() + event_ids = attr.ib() + data = attr.ib() diff --git a/synapse/visibility.py b/synapse/visibility.py index 0281a7c919..16c40cd74c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -67,6 +67,10 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, Returns: Deferred[list[synapse.events.EventBase]] """ + # Filter out events that have been soft failed so that we don't relay them + # to clients. + events = list(e for e in events if not e.internal_metadata.is_soft_failed()) + types = ( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), @@ -216,28 +220,36 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events): - # Whatever else we do, we need to check for senders which have requested - # erasure of their data. - erased_senders = yield store.are_users_erased( - (e.sender for e in events), - ) +def filter_events_for_server(store, server_name, events, redact=True, + check_history_visibility_only=False): + """Filter a list of events based on whether given server is allowed to + see them. - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. - if erased_senders[event.sender]: + Args: + store (DataStore) + server_name (str) + events (iterable[FrozenEvent]) + redact (bool): Whether to return a redacted version of the event, or + to filter them out entirely. + check_history_visibility_only (bool): Whether to only check the + history visibility, rather than things like if the sender has been + erased. This is used e.g. during pagination to decide whether to + backfill or not. + + Returns + Deferred[list[FrozenEvent]] + """ + + def is_sender_erased(event, erased_senders): + if erased_senders and erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, ) - return prune_event(event) - - # state will be None if we decided we didn't need to filter by - # room membership. - if not state: - return event + return True + return False + def check_event_is_visible(event, state): history = state.get((EventTypes.RoomHistoryVisibility, ''), None) if history: visibility = history.content.get("history_visibility", "shared") @@ -259,17 +271,17 @@ def filter_events_for_server(store, server_name, events): memtype = ev.membership if memtype == Membership.JOIN: - return event + return True elif memtype == Membership.INVITE: if visibility == "invited": - return event + return True else: # server has no users in the room: redact - return prune_event(event) + return False - return event + return True - # Next lets check to see if all the events have a history visibility + # Lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( @@ -296,16 +308,31 @@ def filter_events_for_server(store, server_name, events): for e in itervalues(event_map) ) + if not check_history_visibility_only: + erased_senders = yield store.are_users_erased( + (e.sender for e in events), + ) + else: + # We don't want to check whether users are erased, which is equivalent + # to no users having been erased. + erased_senders = {} + if all_open: # all the history_visibility state affecting these events is open, so # we don't need to filter by membership state. We *do* need to check # for user erasure, though. if erased_senders: - events = [ - redact_disallowed(e, None) - for e in events - ] + to_return = [] + for e in events: + if not is_sender_erased(e, erased_senders): + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) + # If there are no erased users then we can just return the given list + # of events without having to copy it. defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -361,7 +388,13 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in iteritems(event_to_state_ids) } - defer.returnValue([ - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ]) + to_return = [] + for e in events: + erased = is_sender_erased(e, erased_senders) + visible = check_event_is_visible(e, event_to_state[e.event_id]) + if visible and not erased: + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py index 8933fe3b72..30a255d441 100644 --- a/tests/api/test_ratelimiting.py +++ b/tests/api/test_ratelimiting.py @@ -6,34 +6,34 @@ from tests import unittest class TestRatelimiter(unittest.TestCase): def test_allowed(self): limiter = Ratelimiter() - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=0, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=0, rate_hz=0.1, burst_count=1 ) self.assertTrue(allowed) self.assertEquals(10., time_allowed) - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=5, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=5, rate_hz=0.1, burst_count=1 ) self.assertFalse(allowed) self.assertEquals(10., time_allowed) - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=10, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=10, rate_hz=0.1, burst_count=1 ) self.assertTrue(allowed) self.assertEquals(20., time_allowed) def test_pruning(self): limiter = Ratelimiter() - allowed, time_allowed = limiter.send_message( - user_id="test_id_1", time_now_s=0, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id_1", time_now_s=0, rate_hz=0.1, burst_count=1 ) self.assertIn("test_id_1", limiter.message_counts) - allowed, time_allowed = limiter.send_message( - user_id="test_id_2", time_now_s=10, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id_2", time_now_s=10, rate_hz=0.1, burst_count=1 ) self.assertNotIn("test_id_1", limiter.message_counts) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 80da1c8954..d60c124eec 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -55,11 +55,11 @@ class ProfileTestCase(unittest.TestCase): federation_client=self.mock_federation, federation_server=Mock(), federation_registry=self.mock_registry, - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) self.store = hs.get_datastore() diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 36e136cded..13486930fb 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -24,13 +24,17 @@ from synapse.api.errors import AuthError from synapse.types import UserID from tests import unittest +from tests.utils import register_federation_servlets -from ..utils import ( - DeferredMockCallable, - MockClock, - MockHttpResource, - setup_test_homeserver, -) +# Some local users to test with +U_APPLE = UserID.from_string("@apple:test") +U_BANANA = UserID.from_string("@banana:test") + +# Remote user +U_ONION = UserID.from_string("@onion:farm") + +# Test room id +ROOM_ID = "a-room" def _expect_edu_transaction(edu_type, content, origin="test"): @@ -46,30 +50,21 @@ def _make_edu_transaction_json(edu_type, content): return json.dumps(_expect_edu_transaction(edu_type, content)).encode('utf8') -class TypingNotificationsTestCase(unittest.TestCase): - """Tests typing notifications to rooms.""" - - @defer.inlineCallbacks - def setUp(self): - self.clock = MockClock() +class TypingNotificationsTestCase(unittest.HomeserverTestCase): + servlets = [register_federation_servlets] - self.mock_http_client = Mock(spec=[]) - self.mock_http_client.put_json = DeferredMockCallable() + def make_homeserver(self, reactor, clock): + # we mock out the keyring so as to skip the authentication check on the + # federation API call. + mock_keyring = Mock(spec=["verify_json_for_server"]) + mock_keyring.verify_json_for_server.return_value = defer.succeed(True) - self.mock_federation_resource = MockHttpResource() - - mock_notifier = Mock() - self.on_new_event = mock_notifier.on_new_event + # we mock out the federation client too + mock_federation_client = Mock(spec=["put_json"]) + mock_federation_client.put_json.return_value = defer.succeed((200, "OK")) - self.auth = Mock(spec=[]) - self.state_handler = Mock() - - hs = yield setup_test_homeserver( - self.addCleanup, - "test", - auth=self.auth, - clock=self.clock, - datastore=Mock( + hs = self.setup_test_homeserver( + datastore=(Mock( spec=[ # Bits that Federation needs "prep_send_transaction", @@ -82,16 +77,21 @@ class TypingNotificationsTestCase(unittest.TestCase): "get_user_directory_stream_pos", "get_current_state_deltas", ] - ), - state_handler=self.state_handler, - handlers=Mock(), - notifier=mock_notifier, - resource_for_client=Mock(), - resource_for_federation=self.mock_federation_resource, - http_client=self.mock_http_client, - keyring=Mock(), + )), + notifier=Mock(), + http_client=mock_federation_client, + keyring=mock_keyring, ) + return hs + + def prepare(self, reactor, clock, hs): + # the tests assume that we are starting at unix time 1000 + reactor.pump((1000, )) + + mock_notifier = hs.get_notifier() + self.on_new_event = mock_notifier.on_new_event + self.handler = hs.get_typing_handler() self.event_source = hs.get_event_sources().sources["typing"] @@ -109,13 +109,12 @@ class TypingNotificationsTestCase(unittest.TestCase): self.datastore.get_received_txn_response = get_received_txn_response - self.room_id = "a-room" - self.room_members = [] def check_joined_room(room_id, user_id): if user_id not in [u.to_string() for u in self.room_members]: raise AuthError(401, "User is not in the room") + hs.get_auth().check_joined_room = check_joined_room def get_joined_hosts_for_room(room_id): return set(member.domain for member in self.room_members) @@ -124,8 +123,7 @@ class TypingNotificationsTestCase(unittest.TestCase): def get_current_user_in_room(room_id): return set(str(u) for u in self.room_members) - - self.state_handler.get_current_user_in_room = get_current_user_in_room + hs.get_state_handler().get_current_user_in_room = get_current_user_in_room self.datastore.get_user_directory_stream_pos.return_value = ( # we deliberately return a non-None stream pos to avoid doing an initial_spam @@ -134,230 +132,208 @@ class TypingNotificationsTestCase(unittest.TestCase): self.datastore.get_current_state_deltas.return_value = None - self.auth.check_joined_room = check_joined_room - self.datastore.get_to_device_stream_token = lambda: 0 self.datastore.get_new_device_msgs_for_remote = lambda *args, **kargs: ([], 0) self.datastore.delete_device_msgs_for_remote = lambda *args, **kargs: None - # Some local users to test with - self.u_apple = UserID.from_string("@apple:test") - self.u_banana = UserID.from_string("@banana:test") - - # Remote user - self.u_onion = UserID.from_string("@onion:farm") - - @defer.inlineCallbacks def test_started_typing_local(self): - self.room_members = [self.u_apple, self.u_banana] + self.room_members = [U_APPLE, U_BANANA] self.assertEquals(self.event_source.get_current_key(), 0) - yield self.handler.started_typing( - target_user=self.u_apple, - auth_user=self.u_apple, - room_id=self.room_id, + self.successResultOf(self.handler.started_typing( + target_user=U_APPLE, + auth_user=U_APPLE, + room_id=ROOM_ID, timeout=20000, - ) + )) self.on_new_event.assert_has_calls( - [call('typing_key', 1, rooms=[self.room_id])] + [call('typing_key', 1, rooms=[ROOM_ID])] ) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events( - room_ids=[self.room_id], from_key=0 + events = self.event_source.get_new_events( + room_ids=[ROOM_ID], from_key=0 ) self.assertEquals( events[0], [ { "type": "m.typing", - "room_id": self.room_id, - "content": {"user_ids": [self.u_apple.to_string()]}, + "room_id": ROOM_ID, + "content": {"user_ids": [U_APPLE.to_string()]}, } ], ) - @defer.inlineCallbacks def test_started_typing_remote_send(self): - self.room_members = [self.u_apple, self.u_onion] - - put_json = self.mock_http_client.put_json - put_json.expect_call_and_return( - call( - "farm", - path="/_matrix/federation/v1/send/1000000/", - data=_expect_edu_transaction( - "m.typing", - content={ - "room_id": self.room_id, - "user_id": self.u_apple.to_string(), - "typing": True, - }, - ), - json_data_callback=ANY, - long_retries=True, - backoff_on_404=True, - ), - defer.succeed((200, "OK")), - ) + self.room_members = [U_APPLE, U_ONION] - yield self.handler.started_typing( - target_user=self.u_apple, - auth_user=self.u_apple, - room_id=self.room_id, + self.successResultOf(self.handler.started_typing( + target_user=U_APPLE, + auth_user=U_APPLE, + room_id=ROOM_ID, timeout=20000, - ) + )) - yield put_json.await_calls() + put_json = self.hs.get_http_client().put_json + put_json.assert_called_once_with( + "farm", + path="/_matrix/federation/v1/send/1000000/", + data=_expect_edu_transaction( + "m.typing", + content={ + "room_id": ROOM_ID, + "user_id": U_APPLE.to_string(), + "typing": True, + }, + ), + json_data_callback=ANY, + long_retries=True, + backoff_on_404=True, + ) - @defer.inlineCallbacks def test_started_typing_remote_recv(self): - self.room_members = [self.u_apple, self.u_onion] + self.room_members = [U_APPLE, U_ONION] self.assertEquals(self.event_source.get_current_key(), 0) - (code, response) = yield self.mock_federation_resource.trigger( + (request, channel) = self.make_request( "PUT", "/_matrix/federation/v1/send/1000000/", _make_edu_transaction_json( "m.typing", content={ - "room_id": self.room_id, - "user_id": self.u_onion.to_string(), + "room_id": ROOM_ID, + "user_id": U_ONION.to_string(), "typing": True, }, ), federation_auth_origin=b'farm', ) + self.render(request) + self.assertEqual(channel.code, 200) self.on_new_event.assert_has_calls( - [call('typing_key', 1, rooms=[self.room_id])] + [call('typing_key', 1, rooms=[ROOM_ID])] ) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events( - room_ids=[self.room_id], from_key=0 + events = self.event_source.get_new_events( + room_ids=[ROOM_ID], from_key=0 ) self.assertEquals( events[0], [ { "type": "m.typing", - "room_id": self.room_id, - "content": {"user_ids": [self.u_onion.to_string()]}, + "room_id": ROOM_ID, + "content": {"user_ids": [U_ONION.to_string()]}, } ], ) - @defer.inlineCallbacks def test_stopped_typing(self): - self.room_members = [self.u_apple, self.u_banana, self.u_onion] - - put_json = self.mock_http_client.put_json - put_json.expect_call_and_return( - call( - "farm", - path="/_matrix/federation/v1/send/1000000/", - data=_expect_edu_transaction( - "m.typing", - content={ - "room_id": self.room_id, - "user_id": self.u_apple.to_string(), - "typing": False, - }, - ), - json_data_callback=ANY, - long_retries=True, - backoff_on_404=True, - ), - defer.succeed((200, "OK")), - ) + self.room_members = [U_APPLE, U_BANANA, U_ONION] # Gut-wrenching from synapse.handlers.typing import RoomMember - member = RoomMember(self.room_id, self.u_apple.to_string()) + member = RoomMember(ROOM_ID, U_APPLE.to_string()) self.handler._member_typing_until[member] = 1002000 - self.handler._room_typing[self.room_id] = set([self.u_apple.to_string()]) + self.handler._room_typing[ROOM_ID] = set([U_APPLE.to_string()]) self.assertEquals(self.event_source.get_current_key(), 0) - yield self.handler.stopped_typing( - target_user=self.u_apple, auth_user=self.u_apple, room_id=self.room_id - ) + self.successResultOf(self.handler.stopped_typing( + target_user=U_APPLE, auth_user=U_APPLE, room_id=ROOM_ID + )) self.on_new_event.assert_has_calls( - [call('typing_key', 1, rooms=[self.room_id])] + [call('typing_key', 1, rooms=[ROOM_ID])] ) - yield put_json.await_calls() + put_json = self.hs.get_http_client().put_json + put_json.assert_called_once_with( + "farm", + path="/_matrix/federation/v1/send/1000000/", + data=_expect_edu_transaction( + "m.typing", + content={ + "room_id": ROOM_ID, + "user_id": U_APPLE.to_string(), + "typing": False, + }, + ), + json_data_callback=ANY, + long_retries=True, + backoff_on_404=True, + ) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events( - room_ids=[self.room_id], from_key=0 + events = self.event_source.get_new_events( + room_ids=[ROOM_ID], from_key=0 ) self.assertEquals( events[0], [ { "type": "m.typing", - "room_id": self.room_id, + "room_id": ROOM_ID, "content": {"user_ids": []}, } ], ) - @defer.inlineCallbacks def test_typing_timeout(self): - self.room_members = [self.u_apple, self.u_banana] + self.room_members = [U_APPLE, U_BANANA] self.assertEquals(self.event_source.get_current_key(), 0) - yield self.handler.started_typing( - target_user=self.u_apple, - auth_user=self.u_apple, - room_id=self.room_id, + self.successResultOf(self.handler.started_typing( + target_user=U_APPLE, + auth_user=U_APPLE, + room_id=ROOM_ID, timeout=10000, - ) + )) self.on_new_event.assert_has_calls( - [call('typing_key', 1, rooms=[self.room_id])] + [call('typing_key', 1, rooms=[ROOM_ID])] ) self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events( - room_ids=[self.room_id], from_key=0 + events = self.event_source.get_new_events( + room_ids=[ROOM_ID], from_key=0 ) self.assertEquals( events[0], [ { "type": "m.typing", - "room_id": self.room_id, - "content": {"user_ids": [self.u_apple.to_string()]}, + "room_id": ROOM_ID, + "content": {"user_ids": [U_APPLE.to_string()]}, } ], ) - self.clock.advance_time(16) + self.reactor.pump([16, ]) self.on_new_event.assert_has_calls( - [call('typing_key', 2, rooms=[self.room_id])] + [call('typing_key', 2, rooms=[ROOM_ID])] ) self.assertEquals(self.event_source.get_current_key(), 2) - events = yield self.event_source.get_new_events( - room_ids=[self.room_id], from_key=1 + events = self.event_source.get_new_events( + room_ids=[ROOM_ID], from_key=1 ) self.assertEquals( events[0], [ { "type": "m.typing", - "room_id": self.room_id, + "room_id": ROOM_ID, "content": {"user_ids": []}, } ], @@ -365,29 +341,29 @@ class TypingNotificationsTestCase(unittest.TestCase): # SYN-230 - see if we can still set after timeout - yield self.handler.started_typing( - target_user=self.u_apple, - auth_user=self.u_apple, - room_id=self.room_id, + self.successResultOf(self.handler.started_typing( + target_user=U_APPLE, + auth_user=U_APPLE, + room_id=ROOM_ID, timeout=10000, - ) + )) self.on_new_event.assert_has_calls( - [call('typing_key', 3, rooms=[self.room_id])] + [call('typing_key', 3, rooms=[ROOM_ID])] ) self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 3) - events = yield self.event_source.get_new_events( - room_ids=[self.room_id], from_key=0 + events = self.event_source.get_new_events( + room_ids=[ROOM_ID], from_key=0 ) self.assertEquals( events[0], [ { "type": "m.typing", - "room_id": self.room_id, - "content": {"user_ids": [self.u_apple.to_string()]}, + "room_id": ROOM_ID, + "content": {"user_ids": [U_APPLE.to_string()]}, } ], ) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 11f2bae698..aefe11ac28 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -14,78 +14,306 @@ # limitations under the License. from mock import Mock -from twisted.internet import defer - from synapse.api.constants import UserTypes -from synapse.handlers.user_directory import UserDirectoryHandler +from synapse.rest.client.v1 import admin, login, room from synapse.storage.roommember import ProfileInfo from tests import unittest -from tests.utils import setup_test_homeserver -class UserDirectoryHandlers(object): - def __init__(self, hs): - self.user_directory_handler = UserDirectoryHandler(hs) +class UserDirectoryTestCase(unittest.HomeserverTestCase): + """ + Tests the UserDirectoryHandler. + """ + servlets = [ + login.register_servlets, + admin.register_servlets, + room.register_servlets, + ] -class UserDirectoryTestCase(unittest.TestCase): - """ Tests the UserDirectoryHandler. """ + def make_homeserver(self, reactor, clock): - @defer.inlineCallbacks - def setUp(self): - hs = yield setup_test_homeserver(self.addCleanup) - self.store = hs.get_datastore() - hs.handlers = UserDirectoryHandlers(hs) + config = self.default_config() + config.update_user_directory = True + return self.setup_test_homeserver(config=config) - self.handler = hs.get_handlers().user_directory_handler + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.handler = hs.get_user_directory_handler() - @defer.inlineCallbacks def test_handle_local_profile_change_with_support_user(self): support_user_id = "@support:test" - yield self.store.register( - user_id=support_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT + self.get_success( + self.store.register( + user_id=support_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT, + ) ) - yield self.handler.handle_local_profile_change(support_user_id, None) - profile = yield self.store.get_user_in_directory(support_user_id) + self.get_success( + self.handler.handle_local_profile_change(support_user_id, None) + ) + profile = self.get_success(self.store.get_user_in_directory(support_user_id)) self.assertTrue(profile is None) display_name = 'display_name' - profile_info = ProfileInfo( - avatar_url='avatar_url', - display_name=display_name, - ) + profile_info = ProfileInfo(avatar_url='avatar_url', display_name=display_name) regular_user_id = '@regular:test' - yield self.handler.handle_local_profile_change(regular_user_id, profile_info) - profile = yield self.store.get_user_in_directory(regular_user_id) + self.get_success( + self.handler.handle_local_profile_change(regular_user_id, profile_info) + ) + profile = self.get_success(self.store.get_user_in_directory(regular_user_id)) self.assertTrue(profile['display_name'] == display_name) - @defer.inlineCallbacks def test_handle_user_deactivated_support_user(self): s_user_id = "@support:test" - self.store.register( - user_id=s_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT + self.get_success( + self.store.register( + user_id=s_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT, + ) ) self.store.remove_from_user_dir = Mock() self.store.remove_from_user_in_public_room = Mock() - yield self.handler.handle_user_deactivated(s_user_id) + self.get_success(self.handler.handle_user_deactivated(s_user_id)) self.store.remove_from_user_dir.not_called() self.store.remove_from_user_in_public_room.not_called() - @defer.inlineCallbacks def test_handle_user_deactivated_regular_user(self): r_user_id = "@regular:test" - self.store.register(user_id=r_user_id, token="123", password_hash=None) + self.get_success( + self.store.register(user_id=r_user_id, token="123", password_hash=None) + ) self.store.remove_from_user_dir = Mock() - self.store.remove_from_user_in_public_room = Mock() - yield self.handler.handle_user_deactivated(r_user_id) + self.get_success(self.handler.handle_user_deactivated(r_user_id)) self.store.remove_from_user_dir.called_once_with(r_user_id) - self.store.remove_from_user_in_public_room.assert_called_once_with(r_user_id) + + def test_private_room(self): + """ + A user can be searched for only by people that are either in a public + room, or that share a private chat. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + + # We do not add users to the directory until they join a room. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + # Check we have populated the database correctly. + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + self.assertEqual( + self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) + ) + self.assertEqual(public_users, []) + + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # We get NO search results when searching for user2 by user3. + s = self.get_success(self.handler.search_users(u3, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + # We get NO search results when searching for user3 by user1. + s = self.get_success(self.handler.search_users(u1, "user3", 10)) + self.assertEqual(len(s["results"]), 0) + + # User 2 then leaves. + self.helper.leave(room, user=u2, tok=u2_token) + + # Check we have removed the values. + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + self.assertEqual(self._compress_shared(shares_private), set()) + self.assertEqual(public_users, []) + + # User1 now gets no search results for any of the other users. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + s = self.get_success(self.handler.search_users(u1, "user3", 10)) + self.assertEqual(len(s["results"]), 0) + + def _compress_shared(self, shared): + """ + Compress a list of users who share rooms dicts to a list of tuples. + """ + r = set() + for i in shared: + r.add((i["user_id"], i["other_user_id"], i["room_id"])) + return r + + def get_users_in_public_rooms(self): + r = self.get_success( + self.store._simple_select_list( + "users_in_public_rooms", None, ("user_id", "room_id") + ) + ) + retval = [] + for i in r: + retval.append((i["user_id"], i["room_id"])) + return retval + + def get_users_who_share_private_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_who_share_private_rooms", + None, + ["user_id", "other_user_id", "room_id"], + ) + ) + + def _add_background_updates(self): + """ + Add the background updates we need to run. + """ + # Ugh, have to reset this flag + self.store._all_done = False + + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_createtables", + "progress_json": "{}", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_process_rooms", + "progress_json": "{}", + "depends_on": "populate_user_directory_createtables", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_process_users", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_rooms", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_cleanup", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_users", + }, + ) + ) + + def test_initial(self): + """ + The user directory's initial handler correctly updates the search tables. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + u3_token = self.login(u3, "pass") + + room = self.helper.create_room_as(u1, is_public=True, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + private_room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(private_room, src=u1, targ=u3, tok=u1_token) + self.helper.join(private_room, user=u3, tok=u3_token) + + self.get_success(self.store.update_user_directory_stream_pos(None)) + self.get_success(self.store.delete_all_from_user_dir()) + + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + # Nothing updated yet + self.assertEqual(shares_private, []) + self.assertEqual(public_users, []) + + # Do the initial population of the user directory via the background update + self._add_background_updates() + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + # User 1 and User 2 are in the same public room + self.assertEqual(set(public_users), set([(u1, room), (u2, room)])) + + # User 1 and User 3 share private rooms + self.assertEqual( + self._compress_shared(shares_private), + set([(u1, u3, private_room), (u3, u1, private_room)]), + ) + + def test_initial_share_all_users(self): + """ + Search all users = True means that a user does not have to share a + private room with the searching user or be in a public room to be search + visible. + """ + self.handler.search_all_users = True + self.hs.config.user_directory_search_all_users = True + + u1 = self.register_user("user1", "pass") + self.register_user("user2", "pass") + u3 = self.register_user("user3", "pass") + + # Wipe the user dir + self.get_success(self.store.update_user_directory_stream_pos(None)) + self.get_success(self.store.delete_all_from_user_dir()) + + # Do the initial population of the user directory via the background update + self._add_background_updates() + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + # No users share rooms + self.assertEqual(public_users, []) + self.assertEqual(self._compress_shared(shares_private), set([])) + + # Despite not sharing a room, search_all_users means we get a search + # result. + s = self.get_success(self.handler.search_users(u1, u3, 10)) + self.assertEqual(len(s["results"]), 1) + + # We can find the other two users + s = self.get_success(self.handler.search_users(u1, "user", 10)) + self.assertEqual(len(s["results"]), 2) + + # Registering a user and then searching for them works. + u4 = self.register_user("user4", "pass") + s = self.get_success(self.handler.search_users(u1, u4, 10)) + self.assertEqual(len(s["results"]), 1) diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 9e9fbbfe93..524af4f8d1 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,10 +31,10 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): hs = self.setup_test_homeserver( "blue", federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - hs.get_ratelimiter().send_message.return_value = (True, 0) + hs.get_ratelimiter().can_do_action.return_value = (True, 0) return hs diff --git a/tests/rest/client/v1/test_admin.py b/tests/rest/client/v1/test_admin.py index 407bf0ac4c..ea03b7e523 100644 --- a/tests/rest/client/v1/test_admin.py +++ b/tests/rest/client/v1/test_admin.py @@ -20,14 +20,48 @@ import json from mock import Mock from synapse.api.constants import UserTypes -from synapse.rest.client.v1.admin import register_servlets +from synapse.rest.client.v1 import admin, login from tests import unittest +class VersionTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + login.register_servlets, + ] + + url = '/_matrix/client/r0/admin/server_version' + + def test_version_string(self): + self.register_user("admin", "pass", admin=True) + self.admin_token = self.login("admin", "pass") + + request, channel = self.make_request("GET", self.url, + access_token=self.admin_token) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), + msg=channel.result["body"]) + self.assertEqual({'server_version', 'python_version'}, + set(channel.json_body.keys())) + + def test_inaccessible_to_non_admins(self): + self.register_user("unprivileged-user", "pass", admin=False) + user_token = self.login("unprivileged-user", "pass") + + request, channel = self.make_request("GET", self.url, + access_token=user_token) + self.render(request) + + self.assertEqual(403, int(channel.result['code']), + msg=channel.result['body']) + + class UserRegisterTestCase(unittest.HomeserverTestCase): - servlets = [register_servlets] + servlets = [admin.register_servlets] def make_homeserver(self, reactor, clock): diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index 483bebc832..36d8547275 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -40,10 +40,10 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): config.auto_join_rooms = [] hs = self.setup_test_homeserver( - config=config, ratelimiter=NonCallableMock(spec_set=["send_message"]) + config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"]) ) self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py new file mode 100644 index 0000000000..86312f1096 --- /dev/null +++ b/tests/rest/client/v1/test_login.py @@ -0,0 +1,163 @@ +import json + +from synapse.rest.client.v1 import admin, login + +from tests import unittest + +LOGIN_URL = b"/_matrix/client/r0/login" + + +class LoginRestServletTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + login.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + + self.hs = self.setup_test_homeserver() + self.hs.config.enable_registration = True + self.hs.config.registrations_require_3pid = [] + self.hs.config.auto_join_rooms = [] + self.hs.config.enable_registration_captcha = False + + return self.hs + + def test_POST_ratelimiting_per_address(self): + self.hs.config.rc_login_address.burst_count = 5 + self.hs.config.rc_login_address.per_second = 0.17 + + # Create different users so we're sure not to be bothered by the per-user + # ratelimiter. + for i in range(0, 6): + self.register_user("kermit" + str(i), "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit" + str(i), + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit" + str(i), + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting_per_account(self): + self.hs.config.rc_login_account.burst_count = 5 + self.hs.config.rc_login_account.per_second = 0.17 + + self.register_user("kermit", "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting_per_account_failed_attempts(self): + self.hs.config.rc_login_failed_attempts.burst_count = 5 + self.hs.config.rc_login_failed_attempts.per_second = 0.17 + + self.register_user("kermit", "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "notamonkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"403", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "notamonkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"403", channel.result) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index a824be9a62..015c144248 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -41,10 +41,10 @@ class RoomBase(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.ratelimiter = self.hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) self.hs.get_federation_handler = Mock(return_value=Mock()) @@ -96,7 +96,7 @@ class RoomPermissionsTestCase(RoomBase): # auth as user_id now self.helper.auth_user_id = self.user_id - def test_send_message(self): + def test_can_do_action(self): msg_content = b'{"msgtype":"m.text","body":"hello"}' seq = iter(range(100)) diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 0ad814c5e5..30fb77bac8 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -42,13 +42,13 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.event_source = hs.get_event_sources().sources["typing"] self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 906b348d3e..8fb525d3bf 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -130,3 +130,53 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"403", channel.result) self.assertEquals(channel.json_body["error"], "Guest access is disabled") + + def test_POST_ratelimiting_guest(self): + self.hs.config.rc_registration.burst_count = 5 + self.hs.config.rc_registration.per_second = 0.17 + + for i in range(0, 6): + url = self.url + b"?kind=guest" + request, channel = self.make_request(b"POST", url, b"{}") + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.reactor.advance(retry_after_ms / 1000.) + + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting(self): + self.hs.config.rc_registration.burst_count = 5 + self.hs.config.rc_registration.per_second = 0.17 + + for i in range(0, 6): + params = { + "username": "kermit" + str(i), + "password": "monkey", + "device_id": "frogfone", + "auth": {"type": LoginType.DUMMY}, + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", self.url, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.reactor.advance(retry_after_ms / 1000.) + + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) diff --git a/tests/rest/media/v1/test_base.py b/tests/rest/media/v1/test_base.py new file mode 100644 index 0000000000..af8f74eb42 --- /dev/null +++ b/tests/rest/media/v1/test_base.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.rest.media.v1._base import get_filename_from_headers + +from tests import unittest + + +class GetFileNameFromHeadersTests(unittest.TestCase): + # input -> expected result + TEST_CASES = { + b"inline; filename=abc.txt": u"abc.txt", + b'inline; filename="azerty"': u"azerty", + b'inline; filename="aze%20rty"': u"aze%20rty", + b'inline; filename="aze\"rty"': u'aze"rty', + b'inline; filename="azer;ty"': u"azer;ty", + + b"inline; filename*=utf-8''foo%C2%A3bar": u"foo£bar", + } + + def tests(self): + for hdr, expected in self.TEST_CASES.items(): + res = get_filename_from_headers( + { + b'Content-Disposition': [hdr], + }, + ) + self.assertEqual( + res, expected, + "expected output for %s to be %s but was %s" % ( + hdr, expected, res, + ) + ) diff --git a/tests/server.py b/tests/server.py index fc1e76d146..37069afdda 100644 --- a/tests/server.py +++ b/tests/server.py @@ -137,6 +137,7 @@ def make_request( access_token=None, request=SynapseRequest, shorthand=True, + federation_auth_origin=None, ): """ Make a web request using the given method and path, feed it the @@ -150,9 +151,11 @@ def make_request( a dict. shorthand: Whether to try and be helpful and prefix the given URL with the usual REST API path, if it doesn't contain it. + federation_auth_origin (bytes|None): if set to not-None, we will add a fake + Authorization header pretenting to be the given server name. Returns: - A synapse.http.site.SynapseRequest. + Tuple[synapse.http.site.SynapseRequest, channel] """ if not isinstance(method, bytes): method = method.encode('ascii') @@ -184,6 +187,11 @@ def make_request( b"Authorization", b"Bearer " + access_token.encode('ascii') ) + if federation_auth_origin is not None: + req.requestHeaders.addRawHeader( + b"Authorization", b"X-Matrix origin=%s,key=,sig=" % (federation_auth_origin,) + ) + if content: req.requestHeaders.addRawHeader(b"Content-Type", b"application/json") @@ -288,9 +296,6 @@ def setup_test_homeserver(cleanup_func, *args, **kwargs): **kwargs ) - pool.runWithConnection = runWithConnection - pool.runInteraction = runInteraction - class ThreadPool: """ Threadless thread pool. @@ -316,8 +321,12 @@ def setup_test_homeserver(cleanup_func, *args, **kwargs): return d clock.threadpool = ThreadPool() - pool.threadpool = ThreadPool() - pool.running = True + + if pool: + pool.runWithConnection = runWithConnection + pool.runInteraction = runInteraction + pool.threadpool = ThreadPool() + pool.running = True return d diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 0dde1ab2fe..fd3361404f 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -16,7 +16,6 @@ from twisted.internet import defer from synapse.storage import UserDirectoryStore -from synapse.storage.roommember import ProfileInfo from tests import unittest from tests.utils import setup_test_homeserver @@ -34,17 +33,11 @@ class UserDirectoryStoreTestCase(unittest.TestCase): # alice and bob are both in !room_id. bobby is not but shares # a homeserver with alice. - yield self.store.add_profiles_to_user_dir( - "!room:id", - { - ALICE: ProfileInfo(None, "alice"), - BOB: ProfileInfo(None, "bob"), - BOBBY: ProfileInfo(None, "bobby"), - }, - ) - yield self.store.add_users_to_public_room("!room:id", [ALICE, BOB]) - yield self.store.add_users_who_share_room( - "!room:id", False, ((ALICE, BOB), (BOB, ALICE)) + yield self.store.update_profile_in_user_dir(ALICE, "alice", None) + yield self.store.update_profile_in_user_dir(BOB, "bob", None) + yield self.store.update_profile_in_user_dir(BOBBY, "bobby", None) + yield self.store.add_users_in_public_rooms( + "!room:id", (ALICE, BOB) ) @defer.inlineCallbacks diff --git a/tests/test_mau.py b/tests/test_mau.py index 04f95c942f..00be1a8c21 100644 --- a/tests/test_mau.py +++ b/tests/test_mau.py @@ -17,7 +17,7 @@ import json -from mock import Mock, NonCallableMock +from mock import Mock from synapse.api.constants import LoginType from synapse.api.errors import Codes, HttpResponseException, SynapseError @@ -36,7 +36,6 @@ class TestMauLimit(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.store = self.hs.get_datastore() diff --git a/tests/unittest.py b/tests/unittest.py index fac254ff10..7772a47078 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -262,6 +262,7 @@ class HomeserverTestCase(TestCase): access_token=None, request=SynapseRequest, shorthand=True, + federation_auth_origin=None, ): """ Create a SynapseRequest at the path using the method and containing the @@ -275,15 +276,18 @@ class HomeserverTestCase(TestCase): a dict. shorthand: Whether to try and be helpful and prefix the given URL with the usual REST API path, if it doesn't contain it. + federation_auth_origin (bytes|None): if set to not-None, we will add a fake + Authorization header pretenting to be the given server name. Returns: - A synapse.http.site.SynapseRequest. + Tuple[synapse.http.site.SynapseRequest, channel] """ if isinstance(content, dict): content = json.dumps(content).encode('utf8') return make_request( - self.reactor, method, path, content, access_token, request, shorthand + self.reactor, method, path, content, access_token, request, shorthand, + federation_auth_origin, ) def render(self, request): @@ -326,10 +330,10 @@ class HomeserverTestCase(TestCase): """ self.reactor.pump([by] * 100) - def get_success(self, d): + def get_success(self, d, by=0.0): if not isinstance(d, Deferred): return d - self.pump() + self.pump(by=by) return self.successResultOf(d) def register_user(self, username, password, admin=False): diff --git a/tests/utils.py b/tests/utils.py index 2dfcb70a93..b58b674aa4 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -29,7 +29,7 @@ from twisted.internet import defer, reactor from synapse.api.constants import EventTypes, RoomVersions from synapse.api.errors import CodeMessageException, cs_error from synapse.config.server import ServerConfig -from synapse.federation.transport import server +from synapse.federation.transport import server as federation_server from synapse.http.server import HttpServer from synapse.server import HomeServer from synapse.storage import DataStore @@ -45,7 +45,9 @@ from synapse.util.ratelimitutils import FederationRateLimiter # set this to True to run the tests against postgres instead of sqlite. USE_POSTGRES_FOR_TESTS = os.environ.get("SYNAPSE_POSTGRES", False) LEAVE_DB = os.environ.get("SYNAPSE_LEAVE_DB", False) -POSTGRES_USER = os.environ.get("SYNAPSE_POSTGRES_USER", "postgres") +POSTGRES_USER = os.environ.get("SYNAPSE_POSTGRES_USER", None) +POSTGRES_HOST = os.environ.get("SYNAPSE_POSTGRES_HOST", None) +POSTGRES_PASSWORD = os.environ.get("SYNAPSE_POSTGRES_PASSWORD", None) POSTGRES_BASE_DB = "_synapse_unit_tests_base_%s" % (os.getpid(),) @@ -58,6 +60,8 @@ def setupdb(): "args": { "database": POSTGRES_BASE_DB, "user": POSTGRES_USER, + "host": POSTGRES_HOST, + "password": POSTGRES_PASSWORD, "cp_min": 1, "cp_max": 5, }, @@ -66,7 +70,9 @@ def setupdb(): config.password_providers = [] config.database_config = pgconfig db_engine = create_engine(pgconfig) - db_conn = db_engine.module.connect(user=POSTGRES_USER) + db_conn = db_engine.module.connect( + user=POSTGRES_USER, host=POSTGRES_HOST, password=POSTGRES_PASSWORD + ) db_conn.autocommit = True cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) @@ -76,7 +82,10 @@ def setupdb(): # Set up in the db db_conn = db_engine.module.connect( - database=POSTGRES_BASE_DB, user=POSTGRES_USER + database=POSTGRES_BASE_DB, + user=POSTGRES_USER, + host=POSTGRES_HOST, + password=POSTGRES_PASSWORD, ) cur = db_conn.cursor() _get_or_create_schema_state(cur, db_engine) @@ -86,7 +95,9 @@ def setupdb(): db_conn.close() def _cleanup(): - db_conn = db_engine.module.connect(user=POSTGRES_USER) + db_conn = db_engine.module.connect( + user=POSTGRES_USER, host=POSTGRES_HOST, password=POSTGRES_PASSWORD + ) db_conn.autocommit = True cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) @@ -104,6 +115,7 @@ def default_config(name): config.signing_key = [MockKey()] config.event_cache_size = 1 config.enable_registration = True + config.enable_registration_captcha = False config.macaroon_secret_key = "not even a little secret" config.expire_access_token = False config.server_name = name @@ -139,9 +151,20 @@ def default_config(name): config.admin_contact = None config.rc_messages_per_second = 10000 config.rc_message_burst_count = 10000 + config.rc_registration.per_second = 10000 + config.rc_registration.burst_count = 10000 + config.rc_login_address.per_second = 10000 + config.rc_login_address.burst_count = 10000 + config.rc_login_account.per_second = 10000 + config.rc_login_account.burst_count = 10000 + config.rc_login_failed_attempts.per_second = 10000 + config.rc_login_failed_attempts.burst_count = 10000 config.saml2_enabled = False config.public_baseurl = None config.default_identity_server = None + config.key_refresh_interval = 24 * 60 * 60 * 1000 + config.old_signing_keys = {} + config.tls_fingerprints = [] config.use_frozen_dicts = False @@ -186,6 +209,9 @@ def setup_test_homeserver( Args: cleanup_func : The function used to register a cleanup routine for after the test. + + Calling this method directly is deprecated: you should instead derive from + HomeserverTestCase. """ if reactor is None: from twisted.internet import reactor @@ -203,7 +229,14 @@ def setup_test_homeserver( config.database_config = { "name": "psycopg2", - "args": {"database": test_db, "cp_min": 1, "cp_max": 5}, + "args": { + "database": test_db, + "host": POSTGRES_HOST, + "password": POSTGRES_PASSWORD, + "user": POSTGRES_USER, + "cp_min": 1, + "cp_max": 5, + }, } else: config.database_config = { @@ -217,7 +250,10 @@ def setup_test_homeserver( # the template database we generate in setupdb() if datastore is None and isinstance(db_engine, PostgresEngine): db_conn = db_engine.module.connect( - database=POSTGRES_BASE_DB, user=POSTGRES_USER + database=POSTGRES_BASE_DB, + user=POSTGRES_USER, + host=POSTGRES_HOST, + password=POSTGRES_PASSWORD, ) db_conn.autocommit = True cur = db_conn.cursor() @@ -267,7 +303,10 @@ def setup_test_homeserver( # Drop the test database db_conn = db_engine.module.connect( - database=POSTGRES_BASE_DB, user=POSTGRES_USER + database=POSTGRES_BASE_DB, + user=POSTGRES_USER, + host=POSTGRES_HOST, + password=POSTGRES_PASSWORD, ) db_conn.autocommit = True cur = db_conn.cursor() @@ -298,6 +337,8 @@ def setup_test_homeserver( cleanup_func(cleanup) hs.setup() + if homeserverToUse.__name__ == "TestHomeServer": + hs.setup_master() else: hs = homeserverToUse( name, @@ -324,23 +365,27 @@ def setup_test_homeserver( fed = kargs.get("resource_for_federation", None) if fed: - server.register_servlets( - hs, - resource=fed, - authenticator=server.Authenticator(hs), - ratelimiter=FederationRateLimiter( - hs.get_clock(), - window_size=hs.config.federation_rc_window_size, - sleep_limit=hs.config.federation_rc_sleep_limit, - sleep_msec=hs.config.federation_rc_sleep_delay, - reject_limit=hs.config.federation_rc_reject_limit, - concurrent_requests=hs.config.federation_rc_concurrent, - ), - ) + register_federation_servlets(hs, fed) defer.returnValue(hs) +def register_federation_servlets(hs, resource): + federation_server.register_servlets( + hs, + resource=resource, + authenticator=federation_server.Authenticator(hs), + ratelimiter=FederationRateLimiter( + hs.get_clock(), + window_size=hs.config.federation_rc_window_size, + sleep_limit=hs.config.federation_rc_sleep_limit, + sleep_msec=hs.config.federation_rc_sleep_delay, + reject_limit=hs.config.federation_rc_reject_limit, + concurrent_requests=hs.config.federation_rc_concurrent, + ), + ) + + def get_mock_call_args(pattern_func, mock_func): """ Return the arguments the mock function was called with interpreted by the pattern functions argument list. @@ -457,6 +502,9 @@ class MockKey(object): def verify(self, message, sig): assert sig == b"\x9a\x87$" + def encode(self): + return b"<fake_encoded_key>" + class MockClock(object): now = 1000 @@ -486,7 +534,7 @@ class MockClock(object): return t def looping_call(self, function, interval): - self.loopers.append([function, interval / 1000., self.now]) + self.loopers.append([function, interval / 1000.0, self.now]) def cancel_call_later(self, timer, ignore_errs=False): if timer[2]: @@ -522,7 +570,7 @@ class MockClock(object): looped[2] = self.now def advance_time_msec(self, ms): - self.advance_time(ms / 1000.) + self.advance_time(ms / 1000.0) def time_bound_deferred(self, d, *args, **kwargs): # We don't bother timing things out for now. @@ -631,7 +679,7 @@ def create_room(hs, room_id, creator_id): "sender": creator_id, "room_id": room_id, "content": {}, - } + }, ) event, context = yield event_creation_handler.create_new_client_event(builder) diff --git a/tox.ini b/tox.ini index 14437e7334..19080a648f 100644 --- a/tox.ini +++ b/tox.ini @@ -118,6 +118,9 @@ commands = python -m towncrier.check --compare-with=origin/develop basepython = python3.6 +[testenv:check-sampleconfig] +commands = {toxinidir}/scripts-dev/generate_sample_config --check + [testenv:codecov] skip_install = True deps = |