diff --git a/changelog.d/9086.feature b/changelog.d/9086.feature
new file mode 100644
index 0000000000..3e678e24d5
--- /dev/null
+++ b/changelog.d/9086.feature
@@ -0,0 +1 @@
+Add an admin API for protecting local media from quarantine.
diff --git a/changelog.d/9093.misc b/changelog.d/9093.misc
new file mode 100644
index 0000000000..53eb8f72a8
--- /dev/null
+++ b/changelog.d/9093.misc
@@ -0,0 +1 @@
+Add type hints to media repository.
diff --git a/changelog.d/9108.bugfix b/changelog.d/9108.bugfix
new file mode 100644
index 0000000000..465ef63508
--- /dev/null
+++ b/changelog.d/9108.bugfix
@@ -0,0 +1 @@
+Fix "Unhandled error in Deferred: BodyExceededMaxSize" errors when .well-known files that are too large.
diff --git a/changelog.d/9110.feature b/changelog.d/9110.feature
new file mode 100644
index 0000000000..01a24dcf49
--- /dev/null
+++ b/changelog.d/9110.feature
@@ -0,0 +1 @@
+Add support for multiple SSO Identity Providers.
diff --git a/changelog.d/9117.bugfix b/changelog.d/9117.bugfix
new file mode 100644
index 0000000000..233a76d18b
--- /dev/null
+++ b/changelog.d/9117.bugfix
@@ -0,0 +1 @@
+Fix corruption of `pushers` data when a postgres bouncer is used.
diff --git a/changelog.d/9124.misc b/changelog.d/9124.misc
new file mode 100644
index 0000000000..346741d982
--- /dev/null
+++ b/changelog.d/9124.misc
@@ -0,0 +1 @@
+Improve efficiency of large state resolutions.
diff --git a/changelog.d/9125.misc b/changelog.d/9125.misc
new file mode 100644
index 0000000000..08459caf5a
--- /dev/null
+++ b/changelog.d/9125.misc
@@ -0,0 +1 @@
+Remove dependency on `distutils`.
diff --git a/changelog.d/9130.feature b/changelog.d/9130.feature
new file mode 100644
index 0000000000..4ec319f1f2
--- /dev/null
+++ b/changelog.d/9130.feature
@@ -0,0 +1 @@
+Add experimental support for handling and persistence of to-device messages to happen on worker processes.
diff --git a/debian/changelog b/debian/changelog
index 609436bf75..1c6308e3a2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.25.0ubuntu1) UNRELEASED; urgency=medium
+
+ * Remove dependency on `python3-distutils`.
+
+ -- Richard van der Hoff <richard@matrix.org> Fri, 15 Jan 2021 12:44:19 +0000
+
matrix-synapse-py3 (1.25.0) stable; urgency=medium
[ Dan Callahan ]
diff --git a/debian/control b/debian/control
index b10401be43..8167a901a4 100644
--- a/debian/control
+++ b/debian/control
@@ -31,7 +31,6 @@ Pre-Depends: dpkg (>= 1.16.1)
Depends:
adduser,
debconf,
- python3-distutils|libpython3-stdlib (<< 3.6),
${misc:Depends},
${shlibs:Depends},
${synapse:pydepends},
diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md
index dfb8c5d751..90faeaaef0 100644
--- a/docs/admin_api/media_admin_api.md
+++ b/docs/admin_api/media_admin_api.md
@@ -4,6 +4,7 @@
* [Quarantining media by ID](#quarantining-media-by-id)
* [Quarantining media in a room](#quarantining-media-in-a-room)
* [Quarantining all media of a user](#quarantining-all-media-of-a-user)
+ * [Protecting media from being quarantined](#protecting-media-from-being-quarantined)
- [Delete local media](#delete-local-media)
* [Delete a specific local media](#delete-a-specific-local-media)
* [Delete local media by date or size](#delete-local-media-by-date-or-size)
@@ -123,6 +124,29 @@ The following fields are returned in the JSON response body:
* `num_quarantined`: integer - The number of media items successfully quarantined
+## Protecting media from being quarantined
+
+This API protects a single piece of local media from being quarantined using the
+above APIs. This is useful for sticker packs and other shared media which you do
+not want to get quarantined, especially when
+[quarantining media in a room](#quarantining-media-in-a-room).
+
+Request:
+
+```
+POST /_synapse/admin/v1/media/protect/<media_id>
+
+{}
+```
+
+Where `media_id` is in the form of `abcdefg12345...`.
+
+Response:
+
+```json
+{}
+```
+
# Delete local media
This API deletes the *local* media from the disk of your own server.
This includes any local thumbnails and copies of media downloaded from
diff --git a/docs/openid.md b/docs/openid.md
index ffa4238fff..b86ae89768 100644
--- a/docs/openid.md
+++ b/docs/openid.md
@@ -42,11 +42,10 @@ as follows:
* For other installation mechanisms, see the documentation provided by the
maintainer.
-To enable the OpenID integration, you should then add an `oidc_config` section
-to your configuration file (or uncomment the `enabled: true` line in the
-existing section). See [sample_config.yaml](./sample_config.yaml) for some
-sample settings, as well as the text below for example configurations for
-specific providers.
+To enable the OpenID integration, you should then add a section to the `oidc_providers`
+setting in your configuration file (or uncomment one of the existing examples).
+See [sample_config.yaml](./sample_config.yaml) for some sample settings, as well as
+the text below for example configurations for specific providers.
## Sample configs
@@ -62,20 +61,21 @@ Directory (tenant) ID as it will be used in the Azure links.
Edit your Synapse config file and change the `oidc_config` section:
```yaml
-oidc_config:
- enabled: true
- issuer: "https://login.microsoftonline.com/<tenant id>/v2.0"
- client_id: "<client id>"
- client_secret: "<client secret>"
- scopes: ["openid", "profile"]
- authorization_endpoint: "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/authorize"
- token_endpoint: "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/token"
- userinfo_endpoint: "https://graph.microsoft.com/oidc/userinfo"
-
- user_mapping_provider:
- config:
- localpart_template: "{{ user.preferred_username.split('@')[0] }}"
- display_name_template: "{{ user.name }}"
+oidc_providers:
+ - idp_id: microsoft
+ idp_name: Microsoft
+ issuer: "https://login.microsoftonline.com/<tenant id>/v2.0"
+ client_id: "<client id>"
+ client_secret: "<client secret>"
+ scopes: ["openid", "profile"]
+ authorization_endpoint: "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/authorize"
+ token_endpoint: "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/token"
+ userinfo_endpoint: "https://graph.microsoft.com/oidc/userinfo"
+
+ user_mapping_provider:
+ config:
+ localpart_template: "{{ user.preferred_username.split('@')[0] }}"
+ display_name_template: "{{ user.name }}"
```
### [Dex][dex-idp]
@@ -103,17 +103,18 @@ Run with `dex serve examples/config-dev.yaml`.
Synapse config:
```yaml
-oidc_config:
- enabled: true
- skip_verification: true # This is needed as Dex is served on an insecure endpoint
- issuer: "http://127.0.0.1:5556/dex"
- client_id: "synapse"
- client_secret: "secret"
- scopes: ["openid", "profile"]
- user_mapping_provider:
- config:
- localpart_template: "{{ user.name }}"
- display_name_template: "{{ user.name|capitalize }}"
+oidc_providers:
+ - idp_id: dex
+ idp_name: "My Dex server"
+ skip_verification: true # This is needed as Dex is served on an insecure endpoint
+ issuer: "http://127.0.0.1:5556/dex"
+ client_id: "synapse"
+ client_secret: "secret"
+ scopes: ["openid", "profile"]
+ user_mapping_provider:
+ config:
+ localpart_template: "{{ user.name }}"
+ display_name_template: "{{ user.name|capitalize }}"
```
### [Keycloak][keycloak-idp]
@@ -152,16 +153,17 @@ Follow the [Getting Started Guide](https://www.keycloak.org/getting-started) to
8. Copy Secret
```yaml
-oidc_config:
- enabled: true
- issuer: "https://127.0.0.1:8443/auth/realms/{realm_name}"
- client_id: "synapse"
- client_secret: "copy secret generated from above"
- scopes: ["openid", "profile"]
- user_mapping_provider:
- config:
- localpart_template: "{{ user.preferred_username }}"
- display_name_template: "{{ user.name }}"
+oidc_providers:
+ - idp_id: keycloak
+ idp_name: "My KeyCloak server"
+ issuer: "https://127.0.0.1:8443/auth/realms/{realm_name}"
+ client_id: "synapse"
+ client_secret: "copy secret generated from above"
+ scopes: ["openid", "profile"]
+ user_mapping_provider:
+ config:
+ localpart_template: "{{ user.preferred_username }}"
+ display_name_template: "{{ user.name }}"
```
### [Auth0][auth0]
@@ -191,16 +193,17 @@ oidc_config:
Synapse config:
```yaml
-oidc_config:
- enabled: true
- issuer: "https://your-tier.eu.auth0.com/" # TO BE FILLED
- client_id: "your-client-id" # TO BE FILLED
- client_secret: "your-client-secret" # TO BE FILLED
- scopes: ["openid", "profile"]
- user_mapping_provider:
- config:
- localpart_template: "{{ user.preferred_username }}"
- display_name_template: "{{ user.name }}"
+oidc_providers:
+ - idp_id: auth0
+ idp_name: Auth0
+ issuer: "https://your-tier.eu.auth0.com/" # TO BE FILLED
+ client_id: "your-client-id" # TO BE FILLED
+ client_secret: "your-client-secret" # TO BE FILLED
+ scopes: ["openid", "profile"]
+ user_mapping_provider:
+ config:
+ localpart_template: "{{ user.preferred_username }}"
+ display_name_template: "{{ user.name }}"
```
### GitHub
@@ -219,21 +222,22 @@ does not return a `sub` property, an alternative `subject_claim` has to be set.
Synapse config:
```yaml
-oidc_config:
- enabled: true
- discover: false
- issuer: "https://github.com/"
- client_id: "your-client-id" # TO BE FILLED
- client_secret: "your-client-secret" # TO BE FILLED
- authorization_endpoint: "https://github.com/login/oauth/authorize"
- token_endpoint: "https://github.com/login/oauth/access_token"
- userinfo_endpoint: "https://api.github.com/user"
- scopes: ["read:user"]
- user_mapping_provider:
- config:
- subject_claim: "id"
- localpart_template: "{{ user.login }}"
- display_name_template: "{{ user.name }}"
+oidc_providers:
+ - idp_id: github
+ idp_name: Github
+ discover: false
+ issuer: "https://github.com/"
+ client_id: "your-client-id" # TO BE FILLED
+ client_secret: "your-client-secret" # TO BE FILLED
+ authorization_endpoint: "https://github.com/login/oauth/authorize"
+ token_endpoint: "https://github.com/login/oauth/access_token"
+ userinfo_endpoint: "https://api.github.com/user"
+ scopes: ["read:user"]
+ user_mapping_provider:
+ config:
+ subject_claim: "id"
+ localpart_template: "{{ user.login }}"
+ display_name_template: "{{ user.name }}"
```
### [Google][google-idp]
@@ -243,16 +247,17 @@ oidc_config:
2. add an "OAuth Client ID" for a Web Application under "Credentials".
3. Copy the Client ID and Client Secret, and add the following to your synapse config:
```yaml
- oidc_config:
- enabled: true
- issuer: "https://accounts.google.com/"
- client_id: "your-client-id" # TO BE FILLED
- client_secret: "your-client-secret" # TO BE FILLED
- scopes: ["openid", "profile"]
- user_mapping_provider:
- config:
- localpart_template: "{{ user.given_name|lower }}"
- display_name_template: "{{ user.name }}"
+ oidc_providers:
+ - idp_id: google
+ idp_name: Google
+ issuer: "https://accounts.google.com/"
+ client_id: "your-client-id" # TO BE FILLED
+ client_secret: "your-client-secret" # TO BE FILLED
+ scopes: ["openid", "profile"]
+ user_mapping_provider:
+ config:
+ localpart_template: "{{ user.given_name|lower }}"
+ display_name_template: "{{ user.name }}"
```
4. Back in the Google console, add this Authorized redirect URI: `[synapse
public baseurl]/_synapse/oidc/callback`.
@@ -266,16 +271,17 @@ oidc_config:
Synapse config:
```yaml
-oidc_config:
- enabled: true
- issuer: "https://id.twitch.tv/oauth2/"
- client_id: "your-client-id" # TO BE FILLED
- client_secret: "your-client-secret" # TO BE FILLED
- client_auth_method: "client_secret_post"
- user_mapping_provider:
- config:
- localpart_template: "{{ user.preferred_username }}"
- display_name_template: "{{ user.name }}"
+oidc_providers:
+ - idp_id: twitch
+ idp_name: Twitch
+ issuer: "https://id.twitch.tv/oauth2/"
+ client_id: "your-client-id" # TO BE FILLED
+ client_secret: "your-client-secret" # TO BE FILLED
+ client_auth_method: "client_secret_post"
+ user_mapping_provider:
+ config:
+ localpart_template: "{{ user.preferred_username }}"
+ display_name_template: "{{ user.name }}"
```
### GitLab
@@ -287,16 +293,17 @@ oidc_config:
Synapse config:
```yaml
-oidc_config:
- enabled: true
- issuer: "https://gitlab.com/"
- client_id: "your-client-id" # TO BE FILLED
- client_secret: "your-client-secret" # TO BE FILLED
- client_auth_method: "client_secret_post"
- scopes: ["openid", "read_user"]
- user_profile_method: "userinfo_endpoint"
- user_mapping_provider:
- config:
- localpart_template: '{{ user.nickname }}'
- display_name_template: '{{ user.name }}'
+oidc_providers:
+ - idp_id: gitlab
+ idp_name: Gitlab
+ issuer: "https://gitlab.com/"
+ client_id: "your-client-id" # TO BE FILLED
+ client_secret: "your-client-secret" # TO BE FILLED
+ client_auth_method: "client_secret_post"
+ scopes: ["openid", "read_user"]
+ user_profile_method: "userinfo_endpoint"
+ user_mapping_provider:
+ config:
+ localpart_template: '{{ user.nickname }}'
+ display_name_template: '{{ user.name }}'
```
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 9da351f9f3..ae995efe9b 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1709,141 +1709,149 @@ saml2_config:
#idp_entityid: 'https://our_idp/entityid'
-# Enable OpenID Connect (OIDC) / OAuth 2.0 for registration and login.
+# List of OpenID Connect (OIDC) / OAuth 2.0 identity providers, for registration
+# and login.
#
-# See https://github.com/matrix-org/synapse/blob/master/docs/openid.md
-# for some example configurations.
+# Options for each entry include:
#
-oidc_config:
- # Uncomment the following to enable authorization against an OpenID Connect
- # server. Defaults to false.
- #
- #enabled: true
-
- # Uncomment the following to disable use of the OIDC discovery mechanism to
- # discover endpoints. Defaults to true.
- #
- #discover: false
-
- # the OIDC issuer. Used to validate tokens and (if discovery is enabled) to
- # discover the provider's endpoints.
- #
- # Required if 'enabled' is true.
- #
- #issuer: "https://accounts.example.com/"
-
- # oauth2 client id to use.
- #
- # Required if 'enabled' is true.
- #
- #client_id: "provided-by-your-issuer"
-
- # oauth2 client secret to use.
- #
- # Required if 'enabled' is true.
- #
- #client_secret: "provided-by-your-issuer"
-
- # auth method to use when exchanging the token.
- # Valid values are 'client_secret_basic' (default), 'client_secret_post' and
- # 'none'.
- #
- #client_auth_method: client_secret_post
-
- # list of scopes to request. This should normally include the "openid" scope.
- # Defaults to ["openid"].
- #
- #scopes: ["openid", "profile"]
-
- # the oauth2 authorization endpoint. Required if provider discovery is disabled.
- #
- #authorization_endpoint: "https://accounts.example.com/oauth2/auth"
-
- # the oauth2 token endpoint. Required if provider discovery is disabled.
- #
- #token_endpoint: "https://accounts.example.com/oauth2/token"
-
- # the OIDC userinfo endpoint. Required if discovery is disabled and the
- # "openid" scope is not requested.
- #
- #userinfo_endpoint: "https://accounts.example.com/userinfo"
-
- # URI where to fetch the JWKS. Required if discovery is disabled and the
- # "openid" scope is used.
- #
- #jwks_uri: "https://accounts.example.com/.well-known/jwks.json"
-
- # Uncomment to skip metadata verification. Defaults to false.
- #
- # Use this if you are connecting to a provider that is not OpenID Connect
- # compliant.
- # Avoid this in production.
- #
- #skip_verification: true
-
- # Whether to fetch the user profile from the userinfo endpoint. Valid
- # values are: "auto" or "userinfo_endpoint".
- #
- # Defaults to "auto", which fetches the userinfo endpoint if "openid" is included
- # in `scopes`. Uncomment the following to always fetch the userinfo endpoint.
- #
- #user_profile_method: "userinfo_endpoint"
-
- # Uncomment to allow a user logging in via OIDC to match a pre-existing account instead
- # of failing. This could be used if switching from password logins to OIDC. Defaults to false.
- #
- #allow_existing_users: true
-
- # An external module can be provided here as a custom solution to mapping
- # attributes returned from a OIDC provider onto a matrix user.
- #
- user_mapping_provider:
- # The custom module's class. Uncomment to use a custom module.
- # Default is 'synapse.handlers.oidc_handler.JinjaOidcMappingProvider'.
- #
- # See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
- # for information on implementing a custom mapping provider.
- #
- #module: mapping_provider.OidcMappingProvider
-
- # Custom configuration values for the module. This section will be passed as
- # a Python dictionary to the user mapping provider module's `parse_config`
- # method.
- #
- # The examples below are intended for the default provider: they should be
- # changed if using a custom provider.
- #
- config:
- # name of the claim containing a unique identifier for the user.
- # Defaults to `sub`, which OpenID Connect compliant providers should provide.
- #
- #subject_claim: "sub"
-
- # Jinja2 template for the localpart of the MXID.
- #
- # When rendering, this template is given the following variables:
- # * user: The claims returned by the UserInfo Endpoint and/or in the ID
- # Token
- #
- # If this is not set, the user will be prompted to choose their
- # own username.
- #
- #localpart_template: "{{ user.preferred_username }}"
-
- # Jinja2 template for the display name to set on first login.
- #
- # If unset, no displayname will be set.
- #
- #display_name_template: "{{ user.given_name }} {{ user.last_name }}"
-
- # Jinja2 templates for extra attributes to send back to the client during
- # login.
- #
- # Note that these are non-standard and clients will ignore them without modifications.
- #
- #extra_attributes:
- #birthdate: "{{ user.birthdate }}"
-
+# idp_id: a unique identifier for this identity provider. Used internally
+# by Synapse; should be a single word such as 'github'.
+#
+# Note that, if this is changed, users authenticating via that provider
+# will no longer be recognised as the same user!
+#
+# idp_name: A user-facing name for this identity provider, which is used to
+# offer the user a choice of login mechanisms.
+#
+# discover: set to 'false' to disable the use of the OIDC discovery mechanism
+# to discover endpoints. Defaults to true.
+#
+# issuer: Required. The OIDC issuer. Used to validate tokens and (if discovery
+# is enabled) to discover the provider's endpoints.
+#
+# client_id: Required. oauth2 client id to use.
+#
+# client_secret: Required. oauth2 client secret to use.
+#
+# client_auth_method: auth method to use when exchanging the token. Valid
+# values are 'client_secret_basic' (default), 'client_secret_post' and
+# 'none'.
+#
+# scopes: list of scopes to request. This should normally include the "openid"
+# scope. Defaults to ["openid"].
+#
+# authorization_endpoint: the oauth2 authorization endpoint. Required if
+# provider discovery is disabled.
+#
+# token_endpoint: the oauth2 token endpoint. Required if provider discovery is
+# disabled.
+#
+# userinfo_endpoint: the OIDC userinfo endpoint. Required if discovery is
+# disabled and the 'openid' scope is not requested.
+#
+# jwks_uri: URI where to fetch the JWKS. Required if discovery is disabled and
+# the 'openid' scope is used.
+#
+# skip_verification: set to 'true' to skip metadata verification. Use this if
+# you are connecting to a provider that is not OpenID Connect compliant.
+# Defaults to false. Avoid this in production.
+#
+# user_profile_method: Whether to fetch the user profile from the userinfo
+# endpoint. Valid values are: 'auto' or 'userinfo_endpoint'.
+#
+# Defaults to 'auto', which fetches the userinfo endpoint if 'openid' is
+# included in 'scopes'. Set to 'userinfo_endpoint' to always fetch the
+# userinfo endpoint.
+#
+# allow_existing_users: set to 'true' to allow a user logging in via OIDC to
+# match a pre-existing account instead of failing. This could be used if
+# switching from password logins to OIDC. Defaults to false.
+#
+# user_mapping_provider: Configuration for how attributes returned from a OIDC
+# provider are mapped onto a matrix user. This setting has the following
+# sub-properties:
+#
+# module: The class name of a custom mapping module. Default is
+# 'synapse.handlers.oidc_handler.JinjaOidcMappingProvider'.
+# See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
+# for information on implementing a custom mapping provider.
+#
+# config: Configuration for the mapping provider module. This section will
+# be passed as a Python dictionary to the user mapping provider
+# module's `parse_config` method.
+#
+# For the default provider, the following settings are available:
+#
+# sub: name of the claim containing a unique identifier for the
+# user. Defaults to 'sub', which OpenID Connect compliant
+# providers should provide.
+#
+# localpart_template: Jinja2 template for the localpart of the MXID.
+# If this is not set, the user will be prompted to choose their
+# own username.
+#
+# display_name_template: Jinja2 template for the display name to set
+# on first login. If unset, no displayname will be set.
+#
+# extra_attributes: a map of Jinja2 templates for extra attributes
+# to send back to the client during login.
+# Note that these are non-standard and clients will ignore them
+# without modifications.
+#
+# When rendering, the Jinja2 templates are given a 'user' variable,
+# which is set to the claims returned by the UserInfo Endpoint and/or
+# in the ID Token.
+#
+# See https://github.com/matrix-org/synapse/blob/master/docs/openid.md
+# for information on how to configure these options.
+#
+# For backwards compatibility, it is also possible to configure a single OIDC
+# provider via an 'oidc_config' setting. This is now deprecated and admins are
+# advised to migrate to the 'oidc_providers' format.
+#
+oidc_providers:
+ # Generic example
+ #
+ #- idp_id: my_idp
+ # idp_name: "My OpenID provider"
+ # discover: false
+ # issuer: "https://accounts.example.com/"
+ # client_id: "provided-by-your-issuer"
+ # client_secret: "provided-by-your-issuer"
+ # client_auth_method: client_secret_post
+ # scopes: ["openid", "profile"]
+ # authorization_endpoint: "https://accounts.example.com/oauth2/auth"
+ # token_endpoint: "https://accounts.example.com/oauth2/token"
+ # userinfo_endpoint: "https://accounts.example.com/userinfo"
+ # jwks_uri: "https://accounts.example.com/.well-known/jwks.json"
+ # skip_verification: true
+
+ # For use with Keycloak
+ #
+ #- idp_id: keycloak
+ # idp_name: Keycloak
+ # issuer: "https://127.0.0.1:8443/auth/realms/my_realm_name"
+ # client_id: "synapse"
+ # client_secret: "copy secret generated in Keycloak UI"
+ # scopes: ["openid", "profile"]
+
+ # For use with Github
+ #
+ #- idp_id: google
+ # idp_name: Google
+ # discover: false
+ # issuer: "https://github.com/"
+ # client_id: "your-client-id" # TO BE FILLED
+ # client_secret: "your-client-secret" # TO BE FILLED
+ # authorization_endpoint: "https://github.com/login/oauth/authorize"
+ # token_endpoint: "https://github.com/login/oauth/access_token"
+ # userinfo_endpoint: "https://api.github.com/user"
+ # scopes: ["read:user"]
+ # user_mapping_provider:
+ # config:
+ # subject_claim: "id"
+ # localpart_template: "{ user.login }"
+ # display_name_template: "{ user.name }"
# Enable Central Authentication Service (CAS) for registration and login.
diff --git a/docs/workers.md b/docs/workers.md
index 7fb651bba4..cc5090f224 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -16,6 +16,9 @@ workers only work with PostgreSQL-based Synapse deployments. SQLite should only
be used for demo purposes and any admin considering workers should already be
running PostgreSQL.
+See also https://matrix.org/blog/2020/11/03/how-we-fixed-synapses-scalability
+for a higher level overview.
+
## Main process/worker communication
The processes communicate with each other via a Synapse-specific protocol called
diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index 2f97e6d258..c7877b4095 100644
--- a/synapse/config/cas.py
+++ b/synapse/config/cas.py
@@ -40,7 +40,7 @@ class CasConfig(Config):
self.cas_required_attributes = {}
def generate_config_section(self, config_dir_path, server_name, **kwargs):
- return """
+ return """\
# Enable Central Authentication Service (CAS) for registration and login.
#
cas_config:
diff --git a/synapse/config/oidc_config.py b/synapse/config/oidc_config.py
index fddca19223..c7fa749377 100644
--- a/synapse/config/oidc_config.py
+++ b/synapse/config/oidc_config.py
@@ -15,7 +15,7 @@
# limitations under the License.
import string
-from typing import Optional, Type
+from typing import Iterable, Optional, Type
import attr
@@ -33,16 +33,8 @@ class OIDCConfig(Config):
section = "oidc"
def read_config(self, config, **kwargs):
- validate_config(MAIN_CONFIG_SCHEMA, config, ())
-
- self.oidc_provider = None # type: Optional[OidcProviderConfig]
-
- oidc_config = config.get("oidc_config")
- if oidc_config and oidc_config.get("enabled", False):
- validate_config(OIDC_PROVIDER_CONFIG_SCHEMA, oidc_config, ("oidc_config",))
- self.oidc_provider = _parse_oidc_config_dict(oidc_config)
-
- if not self.oidc_provider:
+ self.oidc_providers = tuple(_parse_oidc_provider_configs(config))
+ if not self.oidc_providers:
return
try:
@@ -58,144 +50,153 @@ class OIDCConfig(Config):
@property
def oidc_enabled(self) -> bool:
# OIDC is enabled if we have a provider
- return bool(self.oidc_provider)
+ return bool(self.oidc_providers)
def generate_config_section(self, config_dir_path, server_name, **kwargs):
return """\
- # Enable OpenID Connect (OIDC) / OAuth 2.0 for registration and login.
+ # List of OpenID Connect (OIDC) / OAuth 2.0 identity providers, for registration
+ # and login.
+ #
+ # Options for each entry include:
+ #
+ # idp_id: a unique identifier for this identity provider. Used internally
+ # by Synapse; should be a single word such as 'github'.
+ #
+ # Note that, if this is changed, users authenticating via that provider
+ # will no longer be recognised as the same user!
+ #
+ # idp_name: A user-facing name for this identity provider, which is used to
+ # offer the user a choice of login mechanisms.
+ #
+ # discover: set to 'false' to disable the use of the OIDC discovery mechanism
+ # to discover endpoints. Defaults to true.
+ #
+ # issuer: Required. The OIDC issuer. Used to validate tokens and (if discovery
+ # is enabled) to discover the provider's endpoints.
+ #
+ # client_id: Required. oauth2 client id to use.
+ #
+ # client_secret: Required. oauth2 client secret to use.
+ #
+ # client_auth_method: auth method to use when exchanging the token. Valid
+ # values are 'client_secret_basic' (default), 'client_secret_post' and
+ # 'none'.
+ #
+ # scopes: list of scopes to request. This should normally include the "openid"
+ # scope. Defaults to ["openid"].
+ #
+ # authorization_endpoint: the oauth2 authorization endpoint. Required if
+ # provider discovery is disabled.
+ #
+ # token_endpoint: the oauth2 token endpoint. Required if provider discovery is
+ # disabled.
+ #
+ # userinfo_endpoint: the OIDC userinfo endpoint. Required if discovery is
+ # disabled and the 'openid' scope is not requested.
+ #
+ # jwks_uri: URI where to fetch the JWKS. Required if discovery is disabled and
+ # the 'openid' scope is used.
+ #
+ # skip_verification: set to 'true' to skip metadata verification. Use this if
+ # you are connecting to a provider that is not OpenID Connect compliant.
+ # Defaults to false. Avoid this in production.
+ #
+ # user_profile_method: Whether to fetch the user profile from the userinfo
+ # endpoint. Valid values are: 'auto' or 'userinfo_endpoint'.
+ #
+ # Defaults to 'auto', which fetches the userinfo endpoint if 'openid' is
+ # included in 'scopes'. Set to 'userinfo_endpoint' to always fetch the
+ # userinfo endpoint.
+ #
+ # allow_existing_users: set to 'true' to allow a user logging in via OIDC to
+ # match a pre-existing account instead of failing. This could be used if
+ # switching from password logins to OIDC. Defaults to false.
+ #
+ # user_mapping_provider: Configuration for how attributes returned from a OIDC
+ # provider are mapped onto a matrix user. This setting has the following
+ # sub-properties:
+ #
+ # module: The class name of a custom mapping module. Default is
+ # {mapping_provider!r}.
+ # See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
+ # for information on implementing a custom mapping provider.
+ #
+ # config: Configuration for the mapping provider module. This section will
+ # be passed as a Python dictionary to the user mapping provider
+ # module's `parse_config` method.
+ #
+ # For the default provider, the following settings are available:
+ #
+ # sub: name of the claim containing a unique identifier for the
+ # user. Defaults to 'sub', which OpenID Connect compliant
+ # providers should provide.
+ #
+ # localpart_template: Jinja2 template for the localpart of the MXID.
+ # If this is not set, the user will be prompted to choose their
+ # own username.
+ #
+ # display_name_template: Jinja2 template for the display name to set
+ # on first login. If unset, no displayname will be set.
+ #
+ # extra_attributes: a map of Jinja2 templates for extra attributes
+ # to send back to the client during login.
+ # Note that these are non-standard and clients will ignore them
+ # without modifications.
+ #
+ # When rendering, the Jinja2 templates are given a 'user' variable,
+ # which is set to the claims returned by the UserInfo Endpoint and/or
+ # in the ID Token.
#
# See https://github.com/matrix-org/synapse/blob/master/docs/openid.md
- # for some example configurations.
+ # for information on how to configure these options.
#
- oidc_config:
- # Uncomment the following to enable authorization against an OpenID Connect
- # server. Defaults to false.
- #
- #enabled: true
-
- # Uncomment the following to disable use of the OIDC discovery mechanism to
- # discover endpoints. Defaults to true.
- #
- #discover: false
-
- # the OIDC issuer. Used to validate tokens and (if discovery is enabled) to
- # discover the provider's endpoints.
- #
- # Required if 'enabled' is true.
- #
- #issuer: "https://accounts.example.com/"
-
- # oauth2 client id to use.
- #
- # Required if 'enabled' is true.
- #
- #client_id: "provided-by-your-issuer"
-
- # oauth2 client secret to use.
- #
- # Required if 'enabled' is true.
- #
- #client_secret: "provided-by-your-issuer"
-
- # auth method to use when exchanging the token.
- # Valid values are 'client_secret_basic' (default), 'client_secret_post' and
- # 'none'.
- #
- #client_auth_method: client_secret_post
-
- # list of scopes to request. This should normally include the "openid" scope.
- # Defaults to ["openid"].
- #
- #scopes: ["openid", "profile"]
-
- # the oauth2 authorization endpoint. Required if provider discovery is disabled.
- #
- #authorization_endpoint: "https://accounts.example.com/oauth2/auth"
-
- # the oauth2 token endpoint. Required if provider discovery is disabled.
- #
- #token_endpoint: "https://accounts.example.com/oauth2/token"
-
- # the OIDC userinfo endpoint. Required if discovery is disabled and the
- # "openid" scope is not requested.
- #
- #userinfo_endpoint: "https://accounts.example.com/userinfo"
-
- # URI where to fetch the JWKS. Required if discovery is disabled and the
- # "openid" scope is used.
- #
- #jwks_uri: "https://accounts.example.com/.well-known/jwks.json"
-
- # Uncomment to skip metadata verification. Defaults to false.
- #
- # Use this if you are connecting to a provider that is not OpenID Connect
- # compliant.
- # Avoid this in production.
- #
- #skip_verification: true
-
- # Whether to fetch the user profile from the userinfo endpoint. Valid
- # values are: "auto" or "userinfo_endpoint".
+ # For backwards compatibility, it is also possible to configure a single OIDC
+ # provider via an 'oidc_config' setting. This is now deprecated and admins are
+ # advised to migrate to the 'oidc_providers' format.
+ #
+ oidc_providers:
+ # Generic example
#
- # Defaults to "auto", which fetches the userinfo endpoint if "openid" is included
- # in `scopes`. Uncomment the following to always fetch the userinfo endpoint.
+ #- idp_id: my_idp
+ # idp_name: "My OpenID provider"
+ # discover: false
+ # issuer: "https://accounts.example.com/"
+ # client_id: "provided-by-your-issuer"
+ # client_secret: "provided-by-your-issuer"
+ # client_auth_method: client_secret_post
+ # scopes: ["openid", "profile"]
+ # authorization_endpoint: "https://accounts.example.com/oauth2/auth"
+ # token_endpoint: "https://accounts.example.com/oauth2/token"
+ # userinfo_endpoint: "https://accounts.example.com/userinfo"
+ # jwks_uri: "https://accounts.example.com/.well-known/jwks.json"
+ # skip_verification: true
+
+ # For use with Keycloak
#
- #user_profile_method: "userinfo_endpoint"
-
- # Uncomment to allow a user logging in via OIDC to match a pre-existing account instead
- # of failing. This could be used if switching from password logins to OIDC. Defaults to false.
- #
- #allow_existing_users: true
-
- # An external module can be provided here as a custom solution to mapping
- # attributes returned from a OIDC provider onto a matrix user.
+ #- idp_id: keycloak
+ # idp_name: Keycloak
+ # issuer: "https://127.0.0.1:8443/auth/realms/my_realm_name"
+ # client_id: "synapse"
+ # client_secret: "copy secret generated in Keycloak UI"
+ # scopes: ["openid", "profile"]
+
+ # For use with Github
#
- user_mapping_provider:
- # The custom module's class. Uncomment to use a custom module.
- # Default is {mapping_provider!r}.
- #
- # See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
- # for information on implementing a custom mapping provider.
- #
- #module: mapping_provider.OidcMappingProvider
-
- # Custom configuration values for the module. This section will be passed as
- # a Python dictionary to the user mapping provider module's `parse_config`
- # method.
- #
- # The examples below are intended for the default provider: they should be
- # changed if using a custom provider.
- #
- config:
- # name of the claim containing a unique identifier for the user.
- # Defaults to `sub`, which OpenID Connect compliant providers should provide.
- #
- #subject_claim: "sub"
-
- # Jinja2 template for the localpart of the MXID.
- #
- # When rendering, this template is given the following variables:
- # * user: The claims returned by the UserInfo Endpoint and/or in the ID
- # Token
- #
- # If this is not set, the user will be prompted to choose their
- # own username.
- #
- #localpart_template: "{{{{ user.preferred_username }}}}"
-
- # Jinja2 template for the display name to set on first login.
- #
- # If unset, no displayname will be set.
- #
- #display_name_template: "{{{{ user.given_name }}}} {{{{ user.last_name }}}}"
-
- # Jinja2 templates for extra attributes to send back to the client during
- # login.
- #
- # Note that these are non-standard and clients will ignore them without modifications.
- #
- #extra_attributes:
- #birthdate: "{{{{ user.birthdate }}}}"
+ #- idp_id: google
+ # idp_name: Google
+ # discover: false
+ # issuer: "https://github.com/"
+ # client_id: "your-client-id" # TO BE FILLED
+ # client_secret: "your-client-secret" # TO BE FILLED
+ # authorization_endpoint: "https://github.com/login/oauth/authorize"
+ # token_endpoint: "https://github.com/login/oauth/access_token"
+ # userinfo_endpoint: "https://api.github.com/user"
+ # scopes: ["read:user"]
+ # user_mapping_provider:
+ # config:
+ # subject_claim: "id"
+ # localpart_template: "{{ user.login }}"
+ # display_name_template: "{{ user.name }}"
""".format(
mapping_provider=DEFAULT_USER_MAPPING_PROVIDER
)
@@ -234,7 +235,22 @@ OIDC_PROVIDER_CONFIG_SCHEMA = {
},
}
-# the `oidc_config` setting can either be None (as it is in the default
+# the same as OIDC_PROVIDER_CONFIG_SCHEMA, but with compulsory idp_id and idp_name
+OIDC_PROVIDER_CONFIG_WITH_ID_SCHEMA = {
+ "allOf": [OIDC_PROVIDER_CONFIG_SCHEMA, {"required": ["idp_id", "idp_name"]}]
+}
+
+
+# the `oidc_providers` list can either be None (as it is in the default config), or
+# a list of provider configs, each of which requires an explicit ID and name.
+OIDC_PROVIDER_LIST_SCHEMA = {
+ "oneOf": [
+ {"type": "null"},
+ {"type": "array", "items": OIDC_PROVIDER_CONFIG_WITH_ID_SCHEMA},
+ ]
+}
+
+# the `oidc_config` setting can either be None (which it used to be in the default
# config), or an object. If an object, it is ignored unless it has an "enabled: True"
# property.
#
@@ -243,12 +259,41 @@ OIDC_PROVIDER_CONFIG_SCHEMA = {
# additional checks in the code.
OIDC_CONFIG_SCHEMA = {"oneOf": [{"type": "null"}, {"type": "object"}]}
+# the top-level schema can contain an "oidc_config" and/or an "oidc_providers".
MAIN_CONFIG_SCHEMA = {
"type": "object",
- "properties": {"oidc_config": OIDC_CONFIG_SCHEMA},
+ "properties": {
+ "oidc_config": OIDC_CONFIG_SCHEMA,
+ "oidc_providers": OIDC_PROVIDER_LIST_SCHEMA,
+ },
}
+def _parse_oidc_provider_configs(config: JsonDict) -> Iterable["OidcProviderConfig"]:
+ """extract and parse the OIDC provider configs from the config dict
+
+ The configuration may contain either a single `oidc_config` object with an
+ `enabled: True` property, or a list of provider configurations under
+ `oidc_providers`, *or both*.
+
+ Returns a generator which yields the OidcProviderConfig objects
+ """
+ validate_config(MAIN_CONFIG_SCHEMA, config, ())
+
+ for p in config.get("oidc_providers") or []:
+ yield _parse_oidc_config_dict(p)
+
+ # for backwards-compatibility, it is also possible to provide a single "oidc_config"
+ # object with an "enabled: True" property.
+ oidc_config = config.get("oidc_config")
+ if oidc_config and oidc_config.get("enabled", False):
+ # MAIN_CONFIG_SCHEMA checks that `oidc_config` is an object, but not that
+ # it matches OIDC_PROVIDER_CONFIG_SCHEMA (see the comments on OIDC_CONFIG_SCHEMA
+ # above), so now we need to validate it.
+ validate_config(OIDC_PROVIDER_CONFIG_SCHEMA, oidc_config, ("oidc_config",))
+ yield _parse_oidc_config_dict(oidc_config)
+
+
def _parse_oidc_config_dict(oidc_config: JsonDict) -> "OidcProviderConfig":
"""Take the configuration dict and parse it into an OidcProviderConfig
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index cc5f75123c..740c3fc1b1 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -14,14 +14,13 @@
# limitations under the License.
import os
-from distutils.util import strtobool
import pkg_resources
from synapse.api.constants import RoomCreationPreset
from synapse.config._base import Config, ConfigError
from synapse.types import RoomAlias, UserID
-from synapse.util.stringutils import random_string_with_symbols
+from synapse.util.stringutils import random_string_with_symbols, strtobool
class AccountValidityConfig(Config):
@@ -86,12 +85,12 @@ class RegistrationConfig(Config):
section = "registration"
def read_config(self, config, **kwargs):
- self.enable_registration = bool(
- strtobool(str(config.get("enable_registration", False)))
+ self.enable_registration = strtobool(
+ str(config.get("enable_registration", False))
)
if "disable_registration" in config:
- self.enable_registration = not bool(
- strtobool(str(config["disable_registration"]))
+ self.enable_registration = not strtobool(
+ str(config["disable_registration"])
)
self.account_validity = AccountValidityConfig(
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 8028663fa8..3ec4120f85 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -17,7 +17,6 @@
import abc
import os
-from distutils.util import strtobool
from typing import Dict, Optional, Tuple, Type
from unpaddedbase64 import encode_base64
@@ -26,6 +25,7 @@ from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVers
from synapse.types import JsonDict, RoomStreamToken
from synapse.util.caches import intern_dict
from synapse.util.frozenutils import freeze
+from synapse.util.stringutils import strtobool
# Whether we should use frozen_dict in FrozenEvent. Using frozen_dicts prevents
# bugs where we accidentally share e.g. signature dicts. However, converting a
@@ -34,6 +34,7 @@ from synapse.util.frozenutils import freeze
# NOTE: This is overridden by the configuration by the Synapse worker apps, but
# for the sake of tests, it is set here while it cannot be configured on the
# homeserver object itself.
+
USE_FROZEN_DICTS = strtobool(os.environ.get("SYNAPSE_USE_FROZEN_DICTS", "0"))
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 109dc7932f..37a678b6ce 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -163,7 +163,7 @@ class DeviceMessageHandler:
await self.store.mark_remote_user_device_cache_as_stale(sender_user_id)
# Immediately attempt a resync in the background
- run_in_background(self._user_device_resync, sender_user_id)
+ run_in_background(self._user_device_resync, user_id=sender_user_id)
async def send_device_message(
self,
diff --git a/synapse/handlers/oidc_handler.py b/synapse/handlers/oidc_handler.py
index f63a90ec5c..5e5fda7b2f 100644
--- a/synapse/handlers/oidc_handler.py
+++ b/synapse/handlers/oidc_handler.py
@@ -78,21 +78,28 @@ class OidcHandler:
def __init__(self, hs: "HomeServer"):
self._sso_handler = hs.get_sso_handler()
- provider_conf = hs.config.oidc.oidc_provider
+ provider_confs = hs.config.oidc.oidc_providers
# we should not have been instantiated if there is no configured provider.
- assert provider_conf is not None
+ assert provider_confs
self._token_generator = OidcSessionTokenGenerator(hs)
-
- self._provider = OidcProvider(hs, self._token_generator, provider_conf)
+ self._providers = {
+ p.idp_id: OidcProvider(hs, self._token_generator, p) for p in provider_confs
+ }
async def load_metadata(self) -> None:
"""Validate the config and load the metadata from the remote endpoint.
Called at startup to ensure we have everything we need.
"""
- await self._provider.load_metadata()
- await self._provider.load_jwks()
+ for idp_id, p in self._providers.items():
+ try:
+ await p.load_metadata()
+ await p.load_jwks()
+ except Exception as e:
+ raise Exception(
+ "Error while initialising OIDC provider %r" % (idp_id,)
+ ) from e
async def handle_oidc_callback(self, request: SynapseRequest) -> None:
"""Handle an incoming request to /_synapse/oidc/callback
@@ -184,6 +191,12 @@ class OidcHandler:
self._sso_handler.render_error(request, "mismatching_session", str(e))
return
+ oidc_provider = self._providers.get(session_data.idp_id)
+ if not oidc_provider:
+ logger.error("OIDC session uses unknown IdP %r", oidc_provider)
+ self._sso_handler.render_error(request, "unknown_idp", "Unknown IdP")
+ return
+
if b"code" not in request.args:
logger.info("Code parameter is missing")
self._sso_handler.render_error(
@@ -193,7 +206,7 @@ class OidcHandler:
code = request.args[b"code"][0].decode()
- await self._provider.handle_oidc_callback(request, session_data, code)
+ await oidc_provider.handle_oidc_callback(request, session_data, code)
class OidcError(Exception):
diff --git a/synapse/http/client.py b/synapse/http/client.py
index dc4b81ca60..df498c8645 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -766,14 +766,24 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
self.max_size = max_size
def dataReceived(self, data: bytes) -> None:
+ # If the deferred was called, bail early.
+ if self.deferred.called:
+ return
+
self.stream.write(data)
self.length += len(data)
+ # The first time the maximum size is exceeded, error and cancel the
+ # connection. dataReceived might be called again if data was received
+ # in the meantime.
if self.max_size is not None and self.length >= self.max_size:
self.deferred.errback(BodyExceededMaxSize())
- self.deferred = defer.Deferred()
self.transport.loseConnection()
def connectionLost(self, reason: Failure) -> None:
+ # If the maximum size was already exceeded, there's nothing to do.
+ if self.deferred.called:
+ return
+
if reason.check(ResponseDone):
self.deferred.callback(self.length)
elif reason.check(PotentialDataLoss):
diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index c82b4f87d6..8720b1401f 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -15,6 +15,9 @@
# limitations under the License.
import logging
+from typing import TYPE_CHECKING, Tuple
+
+from twisted.web.http import Request
from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
from synapse.http.servlet import RestServlet, parse_boolean, parse_integer
@@ -23,6 +26,10 @@ from synapse.rest.admin._base import (
assert_requester_is_admin,
assert_user_is_admin,
)
+from synapse.types import JsonDict
+
+if TYPE_CHECKING:
+ from synapse.app.homeserver import HomeServer
logger = logging.getLogger(__name__)
@@ -39,11 +46,11 @@ class QuarantineMediaInRoom(RestServlet):
admin_patterns("/quarantine_media/(?P<room_id>[^/]+)")
)
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
- async def on_POST(self, request, room_id: str):
+ async def on_POST(self, request: Request, room_id: str) -> Tuple[int, JsonDict]:
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester.user)
@@ -64,11 +71,11 @@ class QuarantineMediaByUser(RestServlet):
PATTERNS = admin_patterns("/user/(?P<user_id>[^/]+)/media/quarantine")
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
- async def on_POST(self, request, user_id: str):
+ async def on_POST(self, request: Request, user_id: str) -> Tuple[int, JsonDict]:
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester.user)
@@ -91,11 +98,13 @@ class QuarantineMediaByID(RestServlet):
"/media/quarantine/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)"
)
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
- async def on_POST(self, request, server_name: str, media_id: str):
+ async def on_POST(
+ self, request: Request, server_name: str, media_id: str
+ ) -> Tuple[int, JsonDict]:
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester.user)
@@ -109,17 +118,39 @@ class QuarantineMediaByID(RestServlet):
return 200, {}
+class ProtectMediaByID(RestServlet):
+ """Protect local media from being quarantined.
+ """
+
+ PATTERNS = admin_patterns("/media/protect/(?P<media_id>[^/]+)")
+
+ def __init__(self, hs: "HomeServer"):
+ self.store = hs.get_datastore()
+ self.auth = hs.get_auth()
+
+ async def on_POST(self, request: Request, media_id: str) -> Tuple[int, JsonDict]:
+ requester = await self.auth.get_user_by_req(request)
+ await assert_user_is_admin(self.auth, requester.user)
+
+ logging.info("Protecting local media by ID: %s", media_id)
+
+ # Quarantine this media id
+ await self.store.mark_local_media_as_safe(media_id)
+
+ return 200, {}
+
+
class ListMediaInRoom(RestServlet):
"""Lists all of the media in a given room.
"""
PATTERNS = admin_patterns("/room/(?P<room_id>[^/]+)/media")
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
- async def on_GET(self, request, room_id):
+ async def on_GET(self, request: Request, room_id: str) -> Tuple[int, JsonDict]:
requester = await self.auth.get_user_by_req(request)
is_admin = await self.auth.is_server_admin(requester.user)
if not is_admin:
@@ -133,11 +164,11 @@ class ListMediaInRoom(RestServlet):
class PurgeMediaCacheRestServlet(RestServlet):
PATTERNS = admin_patterns("/purge_media_cache")
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.media_repository = hs.get_media_repository()
self.auth = hs.get_auth()
- async def on_POST(self, request):
+ async def on_POST(self, request: Request) -> Tuple[int, JsonDict]:
await assert_requester_is_admin(self.auth, request)
before_ts = parse_integer(request, "before_ts", required=True)
@@ -154,13 +185,15 @@ class DeleteMediaByID(RestServlet):
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)")
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()
- async def on_DELETE(self, request, server_name: str, media_id: str):
+ async def on_DELETE(
+ self, request: Request, server_name: str, media_id: str
+ ) -> Tuple[int, JsonDict]:
await assert_requester_is_admin(self.auth, request)
if self.server_name != server_name:
@@ -182,13 +215,13 @@ class DeleteMediaByDateSize(RestServlet):
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/delete")
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()
- async def on_POST(self, request, server_name: str):
+ async def on_POST(self, request: Request, server_name: str) -> Tuple[int, JsonDict]:
await assert_requester_is_admin(self.auth, request)
before_ts = parse_integer(request, "before_ts", required=True)
@@ -222,7 +255,7 @@ class DeleteMediaByDateSize(RestServlet):
return 200, {"deleted_media": deleted_media, "total": total}
-def register_servlets_for_media_repo(hs, http_server):
+def register_servlets_for_media_repo(hs: "HomeServer", http_server):
"""
Media repo specific APIs.
"""
@@ -230,6 +263,7 @@ def register_servlets_for_media_repo(hs, http_server):
QuarantineMediaInRoom(hs).register(http_server)
QuarantineMediaByID(hs).register(http_server)
QuarantineMediaByUser(hs).register(http_server)
+ ProtectMediaByID(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
DeleteMediaByID(hs).register(http_server)
DeleteMediaByDateSize(hs).register(http_server)
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 47c2b44bff..31a41e4a27 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2019 New Vector Ltd
+# Copyright 2019-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -17,10 +17,11 @@
import logging
import os
import urllib
-from typing import Awaitable
+from typing import Awaitable, Dict, Generator, List, Optional, Tuple
from twisted.internet.interfaces import IConsumer
from twisted.protocols.basic import FileSender
+from twisted.web.http import Request
from synapse.api.errors import Codes, SynapseError, cs_error
from synapse.http.server import finish_request, respond_with_json
@@ -46,7 +47,7 @@ TEXT_CONTENT_TYPES = [
]
-def parse_media_id(request):
+def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
try:
# This allows users to append e.g. /test.png to the URL. Useful for
# clients that parse the URL to see content type.
@@ -69,7 +70,7 @@ def parse_media_id(request):
)
-def respond_404(request):
+def respond_404(request: Request) -> None:
respond_with_json(
request,
404,
@@ -79,8 +80,12 @@ def respond_404(request):
async def respond_with_file(
- request, media_type, file_path, file_size=None, upload_name=None
-):
+ request: Request,
+ media_type: str,
+ file_path: str,
+ file_size: Optional[int] = None,
+ upload_name: Optional[str] = None,
+) -> None:
logger.debug("Responding with %r", file_path)
if os.path.isfile(file_path):
@@ -98,15 +103,20 @@ async def respond_with_file(
respond_404(request)
-def add_file_headers(request, media_type, file_size, upload_name):
+def add_file_headers(
+ request: Request,
+ media_type: str,
+ file_size: Optional[int],
+ upload_name: Optional[str],
+) -> None:
"""Adds the correct response headers in preparation for responding with the
media.
Args:
- request (twisted.web.http.Request)
- media_type (str): The media/content type.
- file_size (int): Size in bytes of the media, if known.
- upload_name (str): The name of the requested file, if any.
+ request
+ media_type: The media/content type.
+ file_size: Size in bytes of the media, if known.
+ upload_name: The name of the requested file, if any.
"""
def _quote(x):
@@ -153,7 +163,8 @@ def add_file_headers(request, media_type, file_size, upload_name):
# select private. don't bother setting Expires as all our
# clients are smart enough to be happy with Cache-Control
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
- request.setHeader(b"Content-Length", b"%d" % (file_size,))
+ if file_size is not None:
+ request.setHeader(b"Content-Length", b"%d" % (file_size,))
# Tell web crawlers to not index, archive, or follow links in media. This
# should help to prevent things in the media repo from showing up in web
@@ -184,7 +195,7 @@ _FILENAME_SEPARATOR_CHARS = {
}
-def _can_encode_filename_as_token(x):
+def _can_encode_filename_as_token(x: str) -> bool:
for c in x:
# from RFC2616:
#
@@ -206,17 +217,21 @@ def _can_encode_filename_as_token(x):
async def respond_with_responder(
- request, responder, media_type, file_size, upload_name=None
-):
+ request: Request,
+ responder: "Optional[Responder]",
+ media_type: str,
+ file_size: Optional[int],
+ upload_name: Optional[str] = None,
+) -> None:
"""Responds to the request with given responder. If responder is None then
returns 404.
Args:
- request (twisted.web.http.Request)
- responder (Responder|None)
- media_type (str): The media/content type.
- file_size (int|None): Size in bytes of the media. If not known it should be None
- upload_name (str|None): The name of the requested file, if any.
+ request
+ responder
+ media_type: The media/content type.
+ file_size: Size in bytes of the media. If not known it should be None
+ upload_name: The name of the requested file, if any.
"""
if request._disconnected:
logger.warning(
@@ -308,22 +323,22 @@ class FileInfo:
self.thumbnail_type = thumbnail_type
-def get_filename_from_headers(headers):
+def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]:
"""
Get the filename of the downloaded file by inspecting the
Content-Disposition HTTP header.
Args:
- headers (dict[bytes, list[bytes]]): The HTTP request headers.
+ headers: The HTTP request headers.
Returns:
- A Unicode string of the filename, or None.
+ The filename, or None.
"""
content_disposition = headers.get(b"Content-Disposition", [b""])
# No header, bail out.
if not content_disposition[0]:
- return
+ return None
_, params = _parse_header(content_disposition[0])
@@ -356,17 +371,16 @@ def get_filename_from_headers(headers):
return upload_name
-def _parse_header(line):
+def _parse_header(line: bytes) -> Tuple[bytes, Dict[bytes, bytes]]:
"""Parse a Content-type like header.
Cargo-culted from `cgi`, but works on bytes rather than strings.
Args:
- line (bytes): header to be parsed
+ line: header to be parsed
Returns:
- Tuple[bytes, dict[bytes, bytes]]:
- the main content-type, followed by the parameter dictionary
+ The main content-type, followed by the parameter dictionary
"""
parts = _parseparam(b";" + line)
key = next(parts)
@@ -386,16 +400,16 @@ def _parse_header(line):
return key, pdict
-def _parseparam(s):
+def _parseparam(s: bytes) -> Generator[bytes, None, None]:
"""Generator which splits the input on ;, respecting double-quoted sequences
Cargo-culted from `cgi`, but works on bytes rather than strings.
Args:
- s (bytes): header to be parsed
+ s: header to be parsed
Returns:
- Iterable[bytes]: the split input
+ The split input
"""
while s[:1] == b";":
s = s[1:]
diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py
index 68dd2a1c8a..4e4c6971f7 100644
--- a/synapse/rest/media/v1/config_resource.py
+++ b/synapse/rest/media/v1/config_resource.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Will Hunt <will@half-shot.uk>
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,22 +15,29 @@
# limitations under the License.
#
+from typing import TYPE_CHECKING
+
+from twisted.web.http import Request
+
from synapse.http.server import DirectServeJsonResource, respond_with_json
+if TYPE_CHECKING:
+ from synapse.app.homeserver import HomeServer
+
class MediaConfigResource(DirectServeJsonResource):
isLeaf = True
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
super().__init__()
config = hs.get_config()
self.clock = hs.get_clock()
self.auth = hs.get_auth()
self.limits_dict = {"m.upload.size": config.max_upload_size}
- async def _async_render_GET(self, request):
+ async def _async_render_GET(self, request: Request) -> None:
await self.auth.get_user_by_req(request)
respond_with_json(request, 200, self.limits_dict, send_cors=True)
- async def _async_render_OPTIONS(self, request):
+ async def _async_render_OPTIONS(self, request: Request) -> None:
respond_with_json(request, 200, {}, send_cors=True)
diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py
index d3d8457303..3ed219ae43 100644
--- a/synapse/rest/media/v1/download_resource.py
+++ b/synapse/rest/media/v1/download_resource.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,24 +14,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
+from typing import TYPE_CHECKING
+
+from twisted.web.http import Request
-import synapse.http.servlet
from synapse.http.server import DirectServeJsonResource, set_cors_headers
+from synapse.http.servlet import parse_boolean
from ._base import parse_media_id, respond_404
+if TYPE_CHECKING:
+ from synapse.app.homeserver import HomeServer
+ from synapse.rest.media.v1.media_repository import MediaRepository
+
logger = logging.getLogger(__name__)
class DownloadResource(DirectServeJsonResource):
isLeaf = True
- def __init__(self, hs, media_repo):
+ def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
super().__init__()
self.media_repo = media_repo
self.server_name = hs.hostname
- async def _async_render_GET(self, request):
+ async def _async_render_GET(self, request: Request) -> None:
set_cors_headers(request)
request.setHeader(
b"Content-Security-Policy",
@@ -49,9 +57,7 @@ class DownloadResource(DirectServeJsonResource):
if server_name == self.server_name:
await self.media_repo.get_local_media(request, media_id, name)
else:
- allow_remote = synapse.http.servlet.parse_boolean(
- request, "allow_remote", default=True
- )
+ allow_remote = parse_boolean(request, "allow_remote", default=True)
if not allow_remote:
logger.info(
"Rejecting request for remote media %s/%s due to allow_remote",
diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
index 9e079f672f..7792f26e78 100644
--- a/synapse/rest/media/v1/filepath.py
+++ b/synapse/rest/media/v1/filepath.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -16,11 +17,12 @@
import functools
import os
import re
+from typing import Callable, List
NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d")
-def _wrap_in_base_path(func):
+def _wrap_in_base_path(func: "Callable[..., str]") -> "Callable[..., str]":
"""Takes a function that returns a relative path and turns it into an
absolute path based on the location of the primary media store
"""
@@ -41,12 +43,18 @@ class MediaFilePaths:
to write to the backup media store (when one is configured)
"""
- def __init__(self, primary_base_path):
+ def __init__(self, primary_base_path: str):
self.base_path = primary_base_path
def default_thumbnail_rel(
- self, default_top_level, default_sub_type, width, height, content_type, method
- ):
+ self,
+ default_top_level: str,
+ default_sub_type: str,
+ width: int,
+ height: int,
+ content_type: str,
+ method: str,
+ ) -> str:
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
return os.path.join(
@@ -55,12 +63,14 @@ class MediaFilePaths:
default_thumbnail = _wrap_in_base_path(default_thumbnail_rel)
- def local_media_filepath_rel(self, media_id):
+ def local_media_filepath_rel(self, media_id: str) -> str:
return os.path.join("local_content", media_id[0:2], media_id[2:4], media_id[4:])
local_media_filepath = _wrap_in_base_path(local_media_filepath_rel)
- def local_media_thumbnail_rel(self, media_id, width, height, content_type, method):
+ def local_media_thumbnail_rel(
+ self, media_id: str, width: int, height: int, content_type: str, method: str
+ ) -> str:
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
return os.path.join(
@@ -86,7 +96,7 @@ class MediaFilePaths:
media_id[4:],
)
- def remote_media_filepath_rel(self, server_name, file_id):
+ def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str:
return os.path.join(
"remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:]
)
@@ -94,8 +104,14 @@ class MediaFilePaths:
remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel)
def remote_media_thumbnail_rel(
- self, server_name, file_id, width, height, content_type, method
- ):
+ self,
+ server_name: str,
+ file_id: str,
+ width: int,
+ height: int,
+ content_type: str,
+ method: str,
+ ) -> str:
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
return os.path.join(
@@ -113,7 +129,7 @@ class MediaFilePaths:
# Should be removed after some time, when most of the thumbnails are stored
# using the new path.
def remote_media_thumbnail_rel_legacy(
- self, server_name, file_id, width, height, content_type
+ self, server_name: str, file_id: str, width: int, height: int, content_type: str
):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
@@ -126,7 +142,7 @@ class MediaFilePaths:
file_name,
)
- def remote_media_thumbnail_dir(self, server_name, file_id):
+ def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str:
return os.path.join(
self.base_path,
"remote_thumbnail",
@@ -136,7 +152,7 @@ class MediaFilePaths:
file_id[4:],
)
- def url_cache_filepath_rel(self, media_id):
+ def url_cache_filepath_rel(self, media_id: str) -> str:
if NEW_FORMAT_ID_RE.match(media_id):
# Media id is of the form <DATE><RANDOM_STRING>
# E.g.: 2017-09-28-fsdRDt24DS234dsf
@@ -146,7 +162,7 @@ class MediaFilePaths:
url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel)
- def url_cache_filepath_dirs_to_delete(self, media_id):
+ def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]:
"The dirs to try and remove if we delete the media_id file"
if NEW_FORMAT_ID_RE.match(media_id):
return [os.path.join(self.base_path, "url_cache", media_id[:10])]
@@ -156,7 +172,9 @@ class MediaFilePaths:
os.path.join(self.base_path, "url_cache", media_id[0:2]),
]
- def url_cache_thumbnail_rel(self, media_id, width, height, content_type, method):
+ def url_cache_thumbnail_rel(
+ self, media_id: str, width: int, height: int, content_type: str, method: str
+ ) -> str:
# Media id is of the form <DATE><RANDOM_STRING>
# E.g.: 2017-09-28-fsdRDt24DS234dsf
@@ -178,7 +196,7 @@ class MediaFilePaths:
url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
- def url_cache_thumbnail_directory(self, media_id):
+ def url_cache_thumbnail_directory(self, media_id: str) -> str:
# Media id is of the form <DATE><RANDOM_STRING>
# E.g.: 2017-09-28-fsdRDt24DS234dsf
@@ -195,7 +213,7 @@ class MediaFilePaths:
media_id[4:],
)
- def url_cache_thumbnail_dirs_to_delete(self, media_id):
+ def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
"The dirs to try and remove if we delete the media_id thumbnails"
# Media id is of the form <DATE><RANDOM_STRING>
# E.g.: 2017-09-28-fsdRDt24DS234dsf
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 83beb02b05..4c9946a616 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,12 +13,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
import errno
import logging
import os
import shutil
-from typing import IO, Dict, List, Optional, Tuple
+from io import BytesIO
+from typing import IO, TYPE_CHECKING, Dict, List, Optional, Set, Tuple
import twisted.internet.error
import twisted.web.http
@@ -56,6 +56,9 @@ from .thumbnail_resource import ThumbnailResource
from .thumbnailer import Thumbnailer, ThumbnailError
from .upload_resource import UploadResource
+if TYPE_CHECKING:
+ from synapse.app.homeserver import HomeServer
+
logger = logging.getLogger(__name__)
@@ -63,7 +66,7 @@ UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000
class MediaRepository:
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
self.hs = hs
self.auth = hs.get_auth()
self.client = hs.get_federation_http_client()
@@ -73,16 +76,16 @@ class MediaRepository:
self.max_upload_size = hs.config.max_upload_size
self.max_image_pixels = hs.config.max_image_pixels
- self.primary_base_path = hs.config.media_store_path
- self.filepaths = MediaFilePaths(self.primary_base_path)
+ self.primary_base_path = hs.config.media_store_path # type: str
+ self.filepaths = MediaFilePaths(self.primary_base_path) # type: MediaFilePaths
self.dynamic_thumbnails = hs.config.dynamic_thumbnails
self.thumbnail_requirements = hs.config.thumbnail_requirements
self.remote_media_linearizer = Linearizer(name="media_remote")
- self.recently_accessed_remotes = set()
- self.recently_accessed_locals = set()
+ self.recently_accessed_remotes = set() # type: Set[Tuple[str, str]]
+ self.recently_accessed_locals = set() # type: Set[str]
self.federation_domain_whitelist = hs.config.federation_domain_whitelist
@@ -113,7 +116,7 @@ class MediaRepository:
"update_recently_accessed_media", self._update_recently_accessed
)
- async def _update_recently_accessed(self):
+ async def _update_recently_accessed(self) -> None:
remote_media = self.recently_accessed_remotes
self.recently_accessed_remotes = set()
@@ -124,12 +127,12 @@ class MediaRepository:
local_media, remote_media, self.clock.time_msec()
)
- def mark_recently_accessed(self, server_name, media_id):
+ def mark_recently_accessed(self, server_name: Optional[str], media_id: str) -> None:
"""Mark the given media as recently accessed.
Args:
- server_name (str|None): Origin server of media, or None if local
- media_id (str): The media ID of the content
+ server_name: Origin server of media, or None if local
+ media_id: The media ID of the content
"""
if server_name:
self.recently_accessed_remotes.add((server_name, media_id))
@@ -459,7 +462,14 @@ class MediaRepository:
def _get_thumbnail_requirements(self, media_type):
return self.thumbnail_requirements.get(media_type, ())
- def _generate_thumbnail(self, thumbnailer, t_width, t_height, t_method, t_type):
+ def _generate_thumbnail(
+ self,
+ thumbnailer: Thumbnailer,
+ t_width: int,
+ t_height: int,
+ t_method: str,
+ t_type: str,
+ ) -> Optional[BytesIO]:
m_width = thumbnailer.width
m_height = thumbnailer.height
@@ -470,22 +480,20 @@ class MediaRepository:
m_height,
self.max_image_pixels,
)
- return
+ return None
if thumbnailer.transpose_method is not None:
m_width, m_height = thumbnailer.transpose()
if t_method == "crop":
- t_byte_source = thumbnailer.crop(t_width, t_height, t_type)
+ return thumbnailer.crop(t_width, t_height, t_type)
elif t_method == "scale":
t_width, t_height = thumbnailer.aspect(t_width, t_height)
t_width = min(m_width, t_width)
t_height = min(m_height, t_height)
- t_byte_source = thumbnailer.scale(t_width, t_height, t_type)
- else:
- t_byte_source = None
+ return thumbnailer.scale(t_width, t_height, t_type)
- return t_byte_source
+ return None
async def generate_local_exact_thumbnail(
self,
@@ -776,7 +784,7 @@ class MediaRepository:
return {"width": m_width, "height": m_height}
- async def delete_old_remote_media(self, before_ts):
+ async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
old_media = await self.store.get_remote_media_before(before_ts)
deleted = 0
@@ -928,7 +936,7 @@ class MediaRepositoryResource(Resource):
within a given rectangle.
"""
- def __init__(self, hs):
+ def __init__(self, hs: "HomeServer"):
# If we're not configured to use it, raise if we somehow got here.
if not hs.config.can_load_media_repo:
raise ConfigError("Synapse is not configured to use a media repo.")
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 268e0c8f50..89cdd605aa 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright 2018 New Vecotr Ltd
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@ import os
import shutil
from typing import IO, TYPE_CHECKING, Any, Optional, Sequence
+from twisted.internet.defer import Deferred
+from twisted.internet.interfaces import IConsumer
from twisted.protocols.basic import FileSender
from synapse.logging.context import defer_to_thread, make_deferred_yieldable
@@ -270,7 +272,7 @@ class MediaStorage:
return self.filepaths.local_media_filepath_rel(file_info.file_id)
-def _write_file_synchronously(source, dest):
+def _write_file_synchronously(source: IO, dest: IO) -> None:
"""Write `source` to the file like `dest` synchronously. Should be called
from a thread.
@@ -286,14 +288,14 @@ class FileResponder(Responder):
"""Wraps an open file that can be sent to a request.
Args:
- open_file (file): A file like object to be streamed ot the client,
+ open_file: A file like object to be streamed ot the client,
is closed when finished streaming.
"""
- def __init__(self, open_file):
+ def __init__(self, open_file: IO):
self.open_file = open_file
- def write_to_consumer(self, consumer):
+ def write_to_consumer(self, consumer: IConsumer) -> Deferred:
return make_deferred_yieldable(
FileSender().beginFileTransfer(self.open_file, consumer)
)
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 1082389d9b..a632099167 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
import datetime
import errno
import fnmatch
@@ -23,12 +23,13 @@ import re
import shutil
import sys
import traceback
-from typing import Dict, Optional
+from typing import TYPE_CHECKING, Any, Dict, Generator, Iterable, Optional, Union
from urllib import parse as urlparse
import attr
from twisted.internet.error import DNSLookupError
+from twisted.web.http import Request
from synapse.api.errors import Codes, SynapseError
from synapse.http.client import SimpleHttpClient
@@ -41,6 +42,7 @@ from synapse.http.servlet import parse_integer, parse_string
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.rest.media.v1._base import get_filename_from_headers
+from synapse.rest.media.v1.media_storage import MediaStorage
from synapse.util import json_encoder
from synapse.util.async_helpers import ObservableDeferred
from synapse.util.caches.expiringcache import ExpiringCache
@@ -48,6 +50,12 @@ from synapse.util.stringutils import random_string
from ._base import FileInfo
+if TYPE_CHECKING:
+ from lxml import etree
+
+ from synapse.app.homeserver import HomeServer
+ from synapse.rest.media.v1.media_repository import MediaRepository
+
logger = logging.getLogger(__name__)
_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
@@ -119,7 +127,12 @@ class OEmbedError(Exception):
class PreviewUrlResource(DirectServeJsonResource):
isLeaf = True
- def __init__(self, hs, media_repo, media_storage):
+ def __init__(
+ self,
+ hs: "HomeServer",
+ media_repo: "MediaRepository",
+ media_storage: MediaStorage,
+ ):
super().__init__()
self.auth = hs.get_auth()
@@ -166,11 +179,11 @@ class PreviewUrlResource(DirectServeJsonResource):
self._start_expire_url_cache_data, 10 * 1000
)
- async def _async_render_OPTIONS(self, request):
+ async def _async_render_OPTIONS(self, request: Request) -> None:
request.setHeader(b"Allow", b"OPTIONS, GET")
respond_with_json(request, 200, {}, send_cors=True)
- async def _async_render_GET(self, request):
+ async def _async_render_GET(self, request: Request) -> None:
# XXX: if get_user_by_req fails, what should we do in an async render?
requester = await self.auth.get_user_by_req(request)
@@ -450,7 +463,7 @@ class PreviewUrlResource(DirectServeJsonResource):
logger.warning("Error downloading oEmbed metadata from %s: %r", url, e)
raise OEmbedError() from e
- async def _download_url(self, url: str, user):
+ async def _download_url(self, url: str, user: str) -> Dict[str, Any]:
# TODO: we should probably honour robots.txt... except in practice
# we're most likely being explicitly triggered by a human rather than a
# bot, so are we really a robot?
@@ -580,7 +593,7 @@ class PreviewUrlResource(DirectServeJsonResource):
"expire_url_cache_data", self._expire_url_cache_data
)
- async def _expire_url_cache_data(self):
+ async def _expire_url_cache_data(self) -> None:
"""Clean up expired url cache content, media and thumbnails.
"""
# TODO: Delete from backup media store
@@ -676,7 +689,9 @@ class PreviewUrlResource(DirectServeJsonResource):
logger.debug("No media removed from url cache")
-def decode_and_calc_og(body, media_uri, request_encoding=None) -> Dict[str, str]:
+def decode_and_calc_og(
+ body: bytes, media_uri: str, request_encoding: Optional[str] = None
+) -> Dict[str, Optional[str]]:
# If there's no body, nothing useful is going to be found.
if not body:
return {}
@@ -697,7 +712,7 @@ def decode_and_calc_og(body, media_uri, request_encoding=None) -> Dict[str, str]
return og
-def _calc_og(tree, media_uri):
+def _calc_og(tree, media_uri: str) -> Dict[str, Optional[str]]:
# suck our tree into lxml and define our OG response.
# if we see any image URLs in the OG response, then spider them
@@ -801,7 +816,9 @@ def _calc_og(tree, media_uri):
for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE)
)
og["og:description"] = summarize_paragraphs(text_nodes)
- else:
+ elif og["og:description"]:
+ # This must be a non-empty string at this point.
+ assert isinstance(og["og:description"], str)
og["og:description"] = summarize_paragraphs([og["og:description"]])
# TODO: delete the url downloads to stop diskfilling,
@@ -809,7 +826,9 @@ def _calc_og(tree, media_uri):
return og
-def _iterate_over_text(tree, *tags_to_ignore):
+def _iterate_over_text(
+ tree, *tags_to_ignore: Iterable[Union[str, "etree.Comment"]]
+) -> Generator[str, None, None]:
"""Iterate over the tree returning text nodes in a depth first fashion,
skipping text nodes inside certain tags.
"""
@@ -843,32 +862,32 @@ def _iterate_over_text(tree, *tags_to_ignore):
)
-def _rebase_url(url, base):
- base = list(urlparse.urlparse(base))
- url = list(urlparse.urlparse(url))
- if not url[0]: # fix up schema
- url[0] = base[0] or "http"
- if not url[1]: # fix up hostname
- url[1] = base[1]
- if not url[2].startswith("/"):
- url[2] = re.sub(r"/[^/]+$", "/", base[2]) + url[2]
- return urlparse.urlunparse(url)
+def _rebase_url(url: str, base: str) -> str:
+ base_parts = list(urlparse.urlparse(base))
+ url_parts = list(urlparse.urlparse(url))
+ if not url_parts[0]: # fix up schema
+ url_parts[0] = base_parts[0] or "http"
+ if not url_parts[1]: # fix up hostname
+ url_parts[1] = base_parts[1]
+ if not url_parts[2].startswith("/"):
+ url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2]
+ return urlparse.urlunparse(url_parts)
-def _is_media(content_type):
- if content_type.lower().startswith("image/"):
- return True
+def _is_media(content_type: str) -> bool:
+ return content_type.lower().startswith("image/")
-def _is_html(content_type):
+def _is_html(content_type: str) -> bool:
content_type = content_type.lower()
- if content_type.startswith("text/html") or content_type.startswith(
+ return content_type.startswith("text/html") or content_type.startswith(
"application/xhtml"
- ):
- return True
+ )
-def summarize_paragraphs(text_nodes, min_size=200, max_size=500):
+def summarize_paragraphs(
+ text_nodes: Iterable[str], min_size: int = 200, max_size: int = 500
+) -> Optional[str]:
# Try to get a summary of between 200 and 500 words, respecting
# first paragraph and then word boundaries.
# TODO: Respect sentences?
diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py
index 67f67efde7..e92006faa9 100644
--- a/synapse/rest/media/v1/storage_provider.py
+++ b/synapse/rest/media/v1/storage_provider.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright 2018 New Vector Ltd
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,10 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import abc
import logging
import os
import shutil
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
from synapse.config._base import Config
from synapse.logging.context import defer_to_thread, run_in_background
@@ -27,13 +28,17 @@ from .media_storage import FileResponder
logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+ from synapse.app.homeserver import HomeServer
-class StorageProvider:
+
+class StorageProvider(metaclass=abc.ABCMeta):
"""A storage provider is a service that can store uploaded media and
retrieve them.
"""
- async def store_file(self, path: str, file_info: FileInfo):
+ @abc.abstractmethod
+ async def store_file(self, path: str, file_info: FileInfo) -> None:
"""Store the file described by file_info. The actual contents can be
retrieved by reading the file in file_info.upload_path.
@@ -42,6 +47,7 @@ class StorageProvider:
file_info: The metadata of the file.
"""
+ @abc.abstractmethod
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
"""Attempt to fetch the file described by file_info and stream it
into writer.
@@ -78,10 +84,10 @@ class StorageProviderWrapper(StorageProvider):
self.store_synchronous = store_synchronous
self.store_remote = store_remote
- def __str__(self):
+ def __str__(self) -> str:
return "StorageProviderWrapper[%s]" % (self.backend,)
- async def store_file(self, path, file_info):
+ async def store_file(self, path: str, file_info: FileInfo) -> None:
if not file_info.server_name and not self.store_local:
return None
@@ -91,7 +97,7 @@ class StorageProviderWrapper(StorageProvider):
if self.store_synchronous:
# store_file is supposed to return an Awaitable, but guard
# against improper implementations.
- return await maybe_awaitable(self.backend.store_file(path, file_info))
+ await maybe_awaitable(self.backend.store_file(path, file_info)) # type: ignore
else:
# TODO: Handle errors.
async def store():
@@ -103,9 +109,8 @@ class StorageProviderWrapper(StorageProvider):
logger.exception("Error storing file")
run_in_background(store)
- return None
- async def fetch(self, path, file_info):
+ async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
# store_file is supposed to return an Awaitable, but guard
# against improper implementations.
return await maybe_awaitable(self.backend.fetch(path, file_info))
@@ -115,11 +120,11 @@ class FileStorageProviderBackend(StorageProvider):
"""A storage provider that stores files in a directory on a filesystem.
Args:
- hs (HomeServer)
+ hs
config: The config returned by `parse_config`.
"""
- def __init__(self, hs, config):
+ def __init__(self, hs: "HomeServer", config: str):
self.hs = hs
self.cache_directory = hs.config.media_store_path
self.base_directory = config
@@ -127,7 +132,7 @@ class FileStorageProviderBackend(StorageProvider):
def __str__(self):
return "FileStorageProviderBackend[%s]" % (self.base_directory,)
- async def store_file(self, path, file_info):
+ async def store_file(self, path: str, file_info: FileInfo) -> None:
"""See StorageProvider.store_file"""
primary_fname = os.path.join(self.cache_directory, path)
@@ -137,19 +142,21 @@ class FileStorageProviderBackend(StorageProvider):
if not os.path.exists(dirname):
os.makedirs(dirname)
- return await defer_to_thread(
+ await defer_to_thread(
self.hs.get_reactor(), shutil.copyfile, primary_fname, backup_fname
)
- async def fetch(self, path, file_info):
+ async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
"""See StorageProvider.fetch"""
backup_fname = os.path.join(self.base_directory, path)
if os.path.isfile(backup_fname):
return FileResponder(open(backup_fname, "rb"))
+ return None
+
@staticmethod
- def parse_config(config):
+ def parse_config(config: dict) -> str:
"""Called on startup to parse config supplied. This should parse
the config and raise if there is a problem.
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index 30421b663a..d6880f2e6e 100644
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014 - 2016 OpenMarket Ltd
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -15,10 +16,14 @@
import logging
+from typing import TYPE_CHECKING
+
+from twisted.web.http import Request
from synapse.api.errors import SynapseError
from synapse.http.server import DirectServeJsonResource, set_cors_headers
from synapse.http.servlet import parse_integer, parse_string
+from synapse.rest.media.v1.media_storage import MediaStorage
from ._base import (
FileInfo,
@@ -28,13 +33,22 @@ from ._base import (
respond_with_responder,
)
+if TYPE_CHECKING:
+ from synapse.app.homeserver import HomeServer
+ from synapse.rest.media.v1.media_repository import MediaRepository
+
logger = logging.getLogger(__name__)
class ThumbnailResource(DirectServeJsonResource):
isLeaf = True
- def __init__(self, hs, media_repo, media_storage):
+ def __init__(
+ self,
+ hs: "HomeServer",
+ media_repo: "MediaRepository",
+ media_storage: MediaStorage,
+ ):
super().__init__()
self.store = hs.get_datastore()
@@ -43,7 +57,7 @@ class ThumbnailResource(DirectServeJsonResource):
self.dynamic_thumbnails = hs.config.dynamic_thumbnails
self.server_name = hs.hostname
- async def _async_render_GET(self, request):
+ async def _async_render_GET(self, request: Request) -> None:
set_cors_headers(request)
server_name, media_id, _ = parse_media_id(request)
width = parse_integer(request, "width", required=True)
@@ -73,8 +87,14 @@ class ThumbnailResource(DirectServeJsonResource):
self.media_repo.mark_recently_accessed(server_name, media_id)
async def _respond_local_thumbnail(
- self, request, media_id, width, height, method, m_type
- ):
+ self,
+ request: Request,
+ media_id: str,
+ width: int,
+ height: int,
+ method: str,
+ m_type: str,
+ ) -> None:
media_info = await self.store.get_local_media(media_id)
if not media_info:
@@ -114,13 +134,13 @@ class ThumbnailResource(DirectServeJsonResource):
async def _select_or_generate_local_thumbnail(
self,
- request,
- media_id,
- desired_width,
- desired_height,
- desired_method,
- desired_type,
- ):
+ request: Request,
+ media_id: str,
+ desired_width: int,
+ desired_height: int,
+ desired_method: str,
+ desired_type: str,
+ ) -> None:
media_info = await self.store.get_local_media(media_id)
if not media_info:
@@ -178,14 +198,14 @@ class ThumbnailResource(DirectServeJsonResource):
async def _select_or_generate_remote_thumbnail(
self,
- request,
- server_name,
- media_id,
- desired_width,
- desired_height,
- desired_method,
- desired_type,
- ):
+ request: Request,
+ server_name: str,
+ media_id: str,
+ desired_width: int,
+ desired_height: int,
+ desired_method: str,
+ desired_type: str,
+ ) -> None:
media_info = await self.media_repo.get_remote_media_info(server_name, media_id)
thumbnail_infos = await self.store.get_remote_media_thumbnails(
@@ -239,8 +259,15 @@ class ThumbnailResource(DirectServeJsonResource):
raise SynapseError(400, "Failed to generate thumbnail.")
async def _respond_remote_thumbnail(
- self, request, server_name, media_id, width, height, method, m_type
- ):
+ self,
+ request: Request,
+ server_name: str,
+ media_id: str,
+ width: int,
+ height: int,
+ method: str,
+ m_type: str,
+ ) -> None:
# TODO: Don't download the whole remote file
# We should proxy the thumbnail from the remote server instead of
# downloading the remote file and generating our own thumbnails.
@@ -275,12 +302,12 @@ class ThumbnailResource(DirectServeJsonResource):
def _select_thumbnail(
self,
- desired_width,
- desired_height,
- desired_method,
- desired_type,
+ desired_width: int,
+ desired_height: int,
+ desired_method: str,
+ desired_type: str,
thumbnail_infos,
- ):
+ ) -> dict:
d_w = desired_width
d_h = desired_height
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index 32a8e4f960..07903e4017 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +15,7 @@
# limitations under the License.
import logging
from io import BytesIO
+from typing import Tuple
from PIL import Image
@@ -39,7 +41,7 @@ class Thumbnailer:
FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"}
- def __init__(self, input_path):
+ def __init__(self, input_path: str):
try:
self.image = Image.open(input_path)
except OSError as e:
@@ -59,11 +61,11 @@ class Thumbnailer:
# A lot of parsing errors can happen when parsing EXIF
logger.info("Error parsing image EXIF information: %s", e)
- def transpose(self):
+ def transpose(self) -> Tuple[int, int]:
"""Transpose the image using its EXIF Orientation tag
Returns:
- Tuple[int, int]: (width, height) containing the new image size in pixels.
+ A tuple containing the new image size in pixels as (width, height).
"""
if self.transpose_method is not None:
self.image = self.image.transpose(self.transpose_method)
@@ -73,7 +75,7 @@ class Thumbnailer:
self.image.info["exif"] = None
return self.image.size
- def aspect(self, max_width, max_height):
+ def aspect(self, max_width: int, max_height: int) -> Tuple[int, int]:
"""Calculate the largest size that preserves aspect ratio which
fits within the given rectangle::
@@ -91,7 +93,7 @@ class Thumbnailer:
else:
return (max_height * self.width) // self.height, max_height
- def _resize(self, width, height):
+ def _resize(self, width: int, height: int) -> Image:
# 1-bit or 8-bit color palette images need converting to RGB
# otherwise they will be scaled using nearest neighbour which
# looks awful
@@ -99,7 +101,7 @@ class Thumbnailer:
self.image = self.image.convert("RGB")
return self.image.resize((width, height), Image.ANTIALIAS)
- def scale(self, width, height, output_type):
+ def scale(self, width: int, height: int, output_type: str) -> BytesIO:
"""Rescales the image to the given dimensions.
Returns:
@@ -108,7 +110,7 @@ class Thumbnailer:
scaled = self._resize(width, height)
return self._encode_image(scaled, output_type)
- def crop(self, width, height, output_type):
+ def crop(self, width: int, height: int, output_type: str) -> BytesIO:
"""Rescales and crops the image to the given dimensions preserving
aspect::
(w_in / h_in) = (w_scaled / h_scaled)
@@ -136,7 +138,7 @@ class Thumbnailer:
cropped = scaled_image.crop((crop_left, 0, crop_right, height))
return self._encode_image(cropped, output_type)
- def _encode_image(self, output_image, output_type):
+ def _encode_image(self, output_image: Image, output_type: str) -> BytesIO:
output_bytes_io = BytesIO()
fmt = self.FORMATS[output_type]
if fmt == "JPEG":
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 42febc9afc..6da76ae994 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,18 +15,25 @@
# limitations under the License.
import logging
+from typing import TYPE_CHECKING
+
+from twisted.web.http import Request
from synapse.api.errors import Codes, SynapseError
from synapse.http.server import DirectServeJsonResource, respond_with_json
from synapse.http.servlet import parse_string
+if TYPE_CHECKING:
+ from synapse.app.homeserver import HomeServer
+ from synapse.rest.media.v1.media_repository import MediaRepository
+
logger = logging.getLogger(__name__)
class UploadResource(DirectServeJsonResource):
isLeaf = True
- def __init__(self, hs, media_repo):
+ def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
super().__init__()
self.media_repo = media_repo
@@ -37,10 +45,10 @@ class UploadResource(DirectServeJsonResource):
self.max_upload_size = hs.config.max_upload_size
self.clock = hs.get_clock()
- async def _async_render_OPTIONS(self, request):
+ async def _async_render_OPTIONS(self, request: Request) -> None:
respond_with_json(request, 200, {}, send_cors=True)
- async def _async_render_POST(self, request):
+ async def _async_render_POST(self, request: Request) -> None:
requester = await self.auth.get_user_by_req(request)
# TODO: The checks here are a bit late. The content will have
# already been uploaded to a tmp file at this point
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 7128dc1742..e46e44ba54 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -16,6 +16,8 @@
import logging
from typing import Dict, List, Optional, Tuple
+import attr
+
from synapse.api.constants import EventContentFields
from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
from synapse.events import make_event_from_dict
@@ -28,6 +30,25 @@ from synapse.types import JsonDict
logger = logging.getLogger(__name__)
+@attr.s(slots=True, frozen=True)
+class _CalculateChainCover:
+ """Return value for _calculate_chain_cover_txn.
+ """
+
+ # The last room_id/depth/stream processed.
+ room_id = attr.ib(type=str)
+ depth = attr.ib(type=int)
+ stream = attr.ib(type=int)
+
+ # Number of rows processed
+ processed_count = attr.ib(type=int)
+
+ # Map from room_id to last depth/stream processed for each room that we have
+ # processed all events for (i.e. the rooms we can flip the
+ # `has_auth_chain_index` for)
+ finished_room_map = attr.ib(type=Dict[str, Tuple[int, int]])
+
+
class EventsBackgroundUpdatesStore(SQLBaseStore):
EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts"
@@ -719,138 +740,29 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
current_room_id = progress.get("current_room_id", "")
- # Have we finished processing the current room.
- finished = progress.get("finished", True)
-
# Where we've processed up to in the room, defaults to the start of the
# room.
last_depth = progress.get("last_depth", -1)
last_stream = progress.get("last_stream", -1)
- # Have we set the `has_auth_chain_index` for the room yet.
- has_set_room_has_chain_index = progress.get(
- "has_set_room_has_chain_index", False
+ result = await self.db_pool.runInteraction(
+ "_chain_cover_index",
+ self._calculate_chain_cover_txn,
+ current_room_id,
+ last_depth,
+ last_stream,
+ batch_size,
+ single_room=False,
)
- if finished:
- # If we've finished with the previous room (or its our first
- # iteration) we move on to the next room.
-
- def _get_next_room(txn: Cursor) -> Optional[str]:
- sql = """
- SELECT room_id FROM rooms
- WHERE room_id > ?
- AND (
- NOT has_auth_chain_index
- OR has_auth_chain_index IS NULL
- )
- ORDER BY room_id
- LIMIT 1
- """
- txn.execute(sql, (current_room_id,))
- row = txn.fetchone()
- if row:
- return row[0]
+ finished = result.processed_count == 0
- return None
-
- current_room_id = await self.db_pool.runInteraction(
- "_chain_cover_index", _get_next_room
- )
- if not current_room_id:
- await self.db_pool.updates._end_background_update("chain_cover")
- return 0
-
- logger.debug("Adding chain cover to %s", current_room_id)
-
- def _calculate_auth_chain(
- txn: Cursor, last_depth: int, last_stream: int
- ) -> Tuple[int, int, int]:
- # Get the next set of events in the room (that we haven't already
- # computed chain cover for). We do this in topological order.
-
- # We want to do a `(topological_ordering, stream_ordering) > (?,?)`
- # comparison, but that is not supported on older SQLite versions
- tuple_clause, tuple_args = make_tuple_comparison_clause(
- self.database_engine,
- [
- ("topological_ordering", last_depth),
- ("stream_ordering", last_stream),
- ],
- )
+ total_rows_processed = result.processed_count
+ current_room_id = result.room_id
+ last_depth = result.depth
+ last_stream = result.stream
- sql = """
- SELECT
- event_id, state_events.type, state_events.state_key,
- topological_ordering, stream_ordering
- FROM events
- INNER JOIN state_events USING (event_id)
- LEFT JOIN event_auth_chains USING (event_id)
- LEFT JOIN event_auth_chain_to_calculate USING (event_id)
- WHERE events.room_id = ?
- AND event_auth_chains.event_id IS NULL
- AND event_auth_chain_to_calculate.event_id IS NULL
- AND %(tuple_cmp)s
- ORDER BY topological_ordering, stream_ordering
- LIMIT ?
- """ % {
- "tuple_cmp": tuple_clause,
- }
-
- args = [current_room_id]
- args.extend(tuple_args)
- args.append(batch_size)
-
- txn.execute(sql, args)
- rows = txn.fetchall()
-
- # Put the results in the necessary format for
- # `_add_chain_cover_index`
- event_to_room_id = {row[0]: current_room_id for row in rows}
- event_to_types = {row[0]: (row[1], row[2]) for row in rows}
-
- new_last_depth = rows[-1][3] if rows else last_depth # type: int
- new_last_stream = rows[-1][4] if rows else last_stream # type: int
-
- count = len(rows)
-
- # We also need to fetch the auth events for them.
- auth_events = self.db_pool.simple_select_many_txn(
- txn,
- table="event_auth",
- column="event_id",
- iterable=event_to_room_id,
- keyvalues={},
- retcols=("event_id", "auth_id"),
- )
-
- event_to_auth_chain = {} # type: Dict[str, List[str]]
- for row in auth_events:
- event_to_auth_chain.setdefault(row["event_id"], []).append(
- row["auth_id"]
- )
-
- # Calculate and persist the chain cover index for this set of events.
- #
- # Annoyingly we need to gut wrench into the persit event store so that
- # we can reuse the function to calculate the chain cover for rooms.
- PersistEventsStore._add_chain_cover_index(
- txn,
- self.db_pool,
- event_to_room_id,
- event_to_types,
- event_to_auth_chain,
- )
-
- return new_last_depth, new_last_stream, count
-
- last_depth, last_stream, count = await self.db_pool.runInteraction(
- "_chain_cover_index", _calculate_auth_chain, last_depth, last_stream
- )
-
- total_rows_processed = count
-
- if count < batch_size and not has_set_room_has_chain_index:
+ for room_id, (depth, stream) in result.finished_room_map.items():
# If we've done all the events in the room we flip the
# `has_auth_chain_index` in the DB. Note that its possible for
# further events to be persisted between the above and setting the
@@ -860,42 +772,159 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
await self.db_pool.simple_update(
table="rooms",
- keyvalues={"room_id": current_room_id},
+ keyvalues={"room_id": room_id},
updatevalues={"has_auth_chain_index": True},
desc="_chain_cover_index",
)
- has_set_room_has_chain_index = True
# Handle any events that might have raced with us flipping the
# bit above.
- last_depth, last_stream, count = await self.db_pool.runInteraction(
- "_chain_cover_index", _calculate_auth_chain, last_depth, last_stream
+ result = await self.db_pool.runInteraction(
+ "_chain_cover_index",
+ self._calculate_chain_cover_txn,
+ room_id,
+ depth,
+ stream,
+ batch_size=None,
+ single_room=True,
)
- total_rows_processed += count
+ total_rows_processed += result.processed_count
- # Note that at this point its technically possible that more events
- # than our `batch_size` have been persisted without their chain
- # cover, so we need to continue processing this room if the last
- # count returned was equal to the `batch_size`.
+ if finished:
+ await self.db_pool.updates._end_background_update("chain_cover")
+ return total_rows_processed
- if count < batch_size:
- # We've finished calculating the index for this room, move on to the
- # next room.
- await self.db_pool.updates._background_update_progress(
- "chain_cover", {"current_room_id": current_room_id, "finished": True},
- )
- else:
- # We still have outstanding events to calculate the index for.
- await self.db_pool.updates._background_update_progress(
- "chain_cover",
- {
- "current_room_id": current_room_id,
- "last_depth": last_depth,
- "last_stream": last_stream,
- "has_auth_chain_index": has_set_room_has_chain_index,
- "finished": False,
- },
- )
+ await self.db_pool.updates._background_update_progress(
+ "chain_cover",
+ {
+ "current_room_id": current_room_id,
+ "last_depth": last_depth,
+ "last_stream": last_stream,
+ },
+ )
return total_rows_processed
+
+ def _calculate_chain_cover_txn(
+ self,
+ txn: Cursor,
+ last_room_id: str,
+ last_depth: int,
+ last_stream: int,
+ batch_size: Optional[int],
+ single_room: bool,
+ ) -> _CalculateChainCover:
+ """Calculate the chain cover for `batch_size` events, ordered by
+ `(room_id, depth, stream)`.
+
+ Args:
+ txn,
+ last_room_id, last_depth, last_stream: The `(room_id, depth, stream)`
+ tuple to fetch results after.
+ batch_size: The maximum number of events to process. If None then
+ no limit.
+ single_room: Whether to calculate the index for just the given
+ room.
+ """
+
+ # Get the next set of events in the room (that we haven't already
+ # computed chain cover for). We do this in topological order.
+
+ # We want to do a `(topological_ordering, stream_ordering) > (?,?)`
+ # comparison, but that is not supported on older SQLite versions
+ tuple_clause, tuple_args = make_tuple_comparison_clause(
+ self.database_engine,
+ [
+ ("events.room_id", last_room_id),
+ ("topological_ordering", last_depth),
+ ("stream_ordering", last_stream),
+ ],
+ )
+
+ extra_clause = ""
+ if single_room:
+ extra_clause = "AND events.room_id = ?"
+ tuple_args.append(last_room_id)
+
+ sql = """
+ SELECT
+ event_id, state_events.type, state_events.state_key,
+ topological_ordering, stream_ordering,
+ events.room_id
+ FROM events
+ INNER JOIN state_events USING (event_id)
+ LEFT JOIN event_auth_chains USING (event_id)
+ LEFT JOIN event_auth_chain_to_calculate USING (event_id)
+ WHERE event_auth_chains.event_id IS NULL
+ AND event_auth_chain_to_calculate.event_id IS NULL
+ AND %(tuple_cmp)s
+ %(extra)s
+ ORDER BY events.room_id, topological_ordering, stream_ordering
+ %(limit)s
+ """ % {
+ "tuple_cmp": tuple_clause,
+ "limit": "LIMIT ?" if batch_size is not None else "",
+ "extra": extra_clause,
+ }
+
+ if batch_size is not None:
+ tuple_args.append(batch_size)
+
+ txn.execute(sql, tuple_args)
+ rows = txn.fetchall()
+
+ # Put the results in the necessary format for
+ # `_add_chain_cover_index`
+ event_to_room_id = {row[0]: row[5] for row in rows}
+ event_to_types = {row[0]: (row[1], row[2]) for row in rows}
+
+ # Calculate the new last position we've processed up to.
+ new_last_depth = rows[-1][3] if rows else last_depth # type: int
+ new_last_stream = rows[-1][4] if rows else last_stream # type: int
+ new_last_room_id = rows[-1][5] if rows else "" # type: str
+
+ # Map from room_id to last depth/stream_ordering processed for the room,
+ # excluding the last room (which we're likely still processing). We also
+ # need to include the room passed in if it's not included in the result
+ # set (as we then know we've processed all events in said room).
+ #
+ # This is the set of rooms that we can now safely flip the
+ # `has_auth_chain_index` bit for.
+ finished_rooms = {
+ row[5]: (row[3], row[4]) for row in rows if row[5] != new_last_room_id
+ }
+ if last_room_id not in finished_rooms and last_room_id != new_last_room_id:
+ finished_rooms[last_room_id] = (last_depth, last_stream)
+
+ count = len(rows)
+
+ # We also need to fetch the auth events for them.
+ auth_events = self.db_pool.simple_select_many_txn(
+ txn,
+ table="event_auth",
+ column="event_id",
+ iterable=event_to_room_id,
+ keyvalues={},
+ retcols=("event_id", "auth_id"),
+ )
+
+ event_to_auth_chain = {} # type: Dict[str, List[str]]
+ for row in auth_events:
+ event_to_auth_chain.setdefault(row["event_id"], []).append(row["auth_id"])
+
+ # Calculate and persist the chain cover index for this set of events.
+ #
+ # Annoyingly we need to gut wrench into the persit event store so that
+ # we can reuse the function to calculate the chain cover for rooms.
+ PersistEventsStore._add_chain_cover_index(
+ txn, self.db_pool, event_to_room_id, event_to_types, event_to_auth_chain,
+ )
+
+ return _CalculateChainCover(
+ room_id=new_last_room_id,
+ depth=new_last_depth,
+ stream=new_last_stream,
+ processed_count=count,
+ finished_room_map=finished_rooms,
+ )
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index 4b2f224718..283c8a5e22 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -169,7 +170,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
async def get_local_media_before(
self, before_ts: int, size_gt: int, keep_profiles: bool,
- ) -> Optional[List[str]]:
+ ) -> List[str]:
# to find files that have never been accessed (last_access_ts IS NULL)
# compare with `created_ts`
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 77ba9d819e..bc7621b8d6 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -17,14 +17,13 @@
import logging
from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Optional, Tuple
-from canonicaljson import encode_canonical_json
-
from synapse.push import PusherConfig, ThrottleParams
from synapse.storage._base import SQLBaseStore, db_to_json
from synapse.storage.database import DatabasePool
from synapse.storage.types import Connection
from synapse.storage.util.id_generators import StreamIdGenerator
from synapse.types import JsonDict
+from synapse.util import json_encoder
from synapse.util.caches.descriptors import cached, cachedList
if TYPE_CHECKING:
@@ -315,7 +314,7 @@ class PusherStore(PusherWorkerStore):
"device_display_name": device_display_name,
"ts": pushkey_ts,
"lang": lang,
- "data": bytearray(encode_canonical_json(data)),
+ "data": json_encoder.encode(data),
"last_stream_ordering": last_stream_ordering,
"profile_tag": profile_tag,
"id": stream_id,
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 61d96a6c28..b103c8694c 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -75,3 +75,22 @@ def shortstr(iterable: Iterable, maxitems: int = 5) -> str:
if len(items) <= maxitems:
return str(items)
return "[" + ", ".join(repr(r) for r in items[:maxitems]) + ", ...]"
+
+
+def strtobool(val: str) -> bool:
+ """Convert a string representation of truth to True or False
+
+ True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
+ are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
+ 'val' is anything else.
+
+ This is lifted from distutils.util.strtobool, with the exception that it actually
+ returns a bool, rather than an int.
+ """
+ val = val.lower()
+ if val in ("y", "yes", "t", "true", "on", "1"):
+ return True
+ elif val in ("n", "no", "f", "false", "off", "0"):
+ return False
+ else:
+ raise ValueError("invalid truth value %r" % (val,))
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
index 02e21ed6ca..b3dfa40d25 100644
--- a/tests/handlers/test_oidc.py
+++ b/tests/handlers/test_oidc.py
@@ -145,7 +145,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
hs = self.setup_test_homeserver(proxied_http_client=self.http_client)
self.handler = hs.get_oidc_handler()
- self.provider = self.handler._provider
+ self.provider = self.handler._providers["oidc"]
sso_handler = hs.get_sso_handler()
# Mock the render error method.
self.render_error = Mock(return_value=None)
@@ -866,7 +866,7 @@ async def _make_callback_with_userinfo(
from synapse.handlers.oidc_handler import OidcSessionData
handler = hs.get_oidc_handler()
- provider = handler._provider
+ provider = handler._providers["oidc"]
provider._exchange_code = simple_async_mock(return_value={})
provider._parse_id_token = simple_async_mock(return_value=userinfo)
provider._fetch_userinfo = simple_async_mock(return_value=userinfo)
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index 4e51839d0f..686012dd25 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -1095,7 +1095,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
# Expire both caches and repeat the request
self.reactor.pump((10000.0,))
- # Repated the request, this time it should fail if the lookup fails.
+ # Repeat the request, this time it should fail if the lookup fails.
fetch_d = defer.ensureDeferred(
self.well_known_resolver.get_well_known(b"testserv")
)
@@ -1130,7 +1130,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
content=b'{ "m.server": "' + (b"a" * WELL_KNOWN_MAX_SIZE) + b'" }',
)
- # The result is sucessful, but disabled delegation.
+ # The result is successful, but disabled delegation.
r = self.successResultOf(fetch_d)
self.assertIsNone(r.delegated_server)
diff --git a/tests/http/test_client.py b/tests/http/test_client.py
new file mode 100644
index 0000000000..f17c122e93
--- /dev/null
+++ b/tests/http/test_client.py
@@ -0,0 +1,101 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from io import BytesIO
+
+from mock import Mock
+
+from twisted.python.failure import Failure
+from twisted.web.client import ResponseDone
+
+from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size
+
+from tests.unittest import TestCase
+
+
+class ReadBodyWithMaxSizeTests(TestCase):
+ def setUp(self):
+ """Start reading the body, returns the response, result and proto"""
+ self.response = Mock()
+ self.result = BytesIO()
+ self.deferred = read_body_with_max_size(self.response, self.result, 6)
+
+ # Fish the protocol out of the response.
+ self.protocol = self.response.deliverBody.call_args[0][0]
+ self.protocol.transport = Mock()
+
+ def _cleanup_error(self):
+ """Ensure that the error in the Deferred is handled gracefully."""
+ called = [False]
+
+ def errback(f):
+ called[0] = True
+
+ self.deferred.addErrback(errback)
+ self.assertTrue(called[0])
+
+ def test_no_error(self):
+ """A response that is NOT too large."""
+
+ # Start sending data.
+ self.protocol.dataReceived(b"12345")
+ # Close the connection.
+ self.protocol.connectionLost(Failure(ResponseDone()))
+
+ self.assertEqual(self.result.getvalue(), b"12345")
+ self.assertEqual(self.deferred.result, 5)
+
+ def test_too_large(self):
+ """A response which is too large raises an exception."""
+
+ # Start sending data.
+ self.protocol.dataReceived(b"1234567890")
+ # Close the connection.
+ self.protocol.connectionLost(Failure(ResponseDone()))
+
+ self.assertEqual(self.result.getvalue(), b"1234567890")
+ self.assertIsInstance(self.deferred.result, Failure)
+ self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize)
+ self._cleanup_error()
+
+ def test_multiple_packets(self):
+ """Data should be accummulated through mutliple packets."""
+
+ # Start sending data.
+ self.protocol.dataReceived(b"12")
+ self.protocol.dataReceived(b"34")
+ # Close the connection.
+ self.protocol.connectionLost(Failure(ResponseDone()))
+
+ self.assertEqual(self.result.getvalue(), b"1234")
+ self.assertEqual(self.deferred.result, 4)
+
+ def test_additional_data(self):
+ """A connection can receive data after being closed."""
+
+ # Start sending data.
+ self.protocol.dataReceived(b"1234567890")
+ self.assertIsInstance(self.deferred.result, Failure)
+ self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize)
+ self.protocol.transport.loseConnection.assert_called_once()
+
+ # More data might have come in.
+ self.protocol.dataReceived(b"1234567890")
+ # Close the connection.
+ self.protocol.connectionLost(Failure(ResponseDone()))
+
+ self.assertEqual(self.result.getvalue(), b"1234567890")
+ self.assertIsInstance(self.deferred.result, Failure)
+ self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize)
+ self._cleanup_error()
diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py
index 586b877bda..9d22c04073 100644
--- a/tests/rest/admin/test_admin.py
+++ b/tests/rest/admin/test_admin.py
@@ -153,8 +153,6 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase):
]
def prepare(self, reactor, clock, hs):
- self.store = hs.get_datastore()
-
# Allow for uploading and downloading to/from the media repo
self.media_repo = hs.get_media_repository_resource()
self.download_resource = self.media_repo.children[b"download"]
@@ -428,7 +426,11 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase):
# Mark the second item as safe from quarantine.
_, media_id_2 = server_and_media_id_2.split("/")
- self.get_success(self.store.mark_local_media_as_safe(media_id_2))
+ # Quarantine the media
+ url = "/_synapse/admin/v1/media/protect/%s" % (urllib.parse.quote(media_id_2),)
+ channel = self.make_request("POST", url, access_token=admin_user_tok)
+ self.pump(1.0)
+ self.assertEqual(200, int(channel.code), msg=channel.result["body"])
# Quarantine all media by this user
url = "/_synapse/admin/v1/user/%s/media/quarantine" % urllib.parse.quote(
diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py
index 3e8661f9b9..a6488a3d29 100644
--- a/tests/rest/client/v2_alpha/test_auth.py
+++ b/tests/rest/client/v2_alpha/test_auth.py
@@ -475,7 +475,9 @@ class UIAuthTests(unittest.HomeserverTestCase):
session_id = channel.json_body["session"]
# do the OIDC auth, but auth as the wrong user
- channel = self.helper.auth_via_oidc("wrong_user", ui_auth_session_id=session_id)
+ channel = self.helper.auth_via_oidc(
+ {"sub": "wrong_user"}, ui_auth_session_id=session_id
+ )
# that should return a failure message
self.assertSubstring("We were unable to validate", channel.text_body)
diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py
index ff67a73749..0c46ad595b 100644
--- a/tests/storage/test_event_chain.py
+++ b/tests/storage/test_event_chain.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from typing import Dict, List, Tuple
+from typing import Dict, List, Set, Tuple
from twisted.trial import unittest
@@ -483,22 +483,20 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
login.register_servlets,
]
- def test_background_update(self):
- """Test that the background update to calculate auth chains for historic
- rooms works correctly.
- """
-
- # Create a room
- user_id = self.register_user("foo", "pass")
- token = self.login("foo", "pass")
- room_id = self.helper.create_room_as(user_id, tok=token)
- requester = create_requester(user_id)
+ def prepare(self, reactor, clock, hs):
+ self.store = hs.get_datastore()
+ self.user_id = self.register_user("foo", "pass")
+ self.token = self.login("foo", "pass")
+ self.requester = create_requester(self.user_id)
- store = self.hs.get_datastore()
+ def _generate_room(self) -> Tuple[str, List[Set[str]]]:
+ """Insert a room without a chain cover index.
+ """
+ room_id = self.helper.create_room_as(self.user_id, tok=self.token)
# Mark the room as not having a chain cover index
self.get_success(
- store.db_pool.simple_update(
+ self.store.db_pool.simple_update(
table="rooms",
keyvalues={"room_id": room_id},
updatevalues={"has_auth_chain_index": False},
@@ -508,42 +506,44 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
# Create a fork in the DAG with different events.
event_handler = self.hs.get_event_creation_handler()
- latest_event_ids = self.get_success(store.get_prev_events_for_room(room_id))
+ latest_event_ids = self.get_success(
+ self.store.get_prev_events_for_room(room_id)
+ )
event, context = self.get_success(
event_handler.create_event(
- requester,
+ self.requester,
{
"type": "some_state_type",
"state_key": "",
"content": {},
"room_id": room_id,
- "sender": user_id,
+ "sender": self.user_id,
},
prev_event_ids=latest_event_ids,
)
)
self.get_success(
- event_handler.handle_new_client_event(requester, event, context)
+ event_handler.handle_new_client_event(self.requester, event, context)
)
- state1 = list(self.get_success(context.get_current_state_ids()).values())
+ state1 = set(self.get_success(context.get_current_state_ids()).values())
event, context = self.get_success(
event_handler.create_event(
- requester,
+ self.requester,
{
"type": "some_state_type",
"state_key": "",
"content": {},
"room_id": room_id,
- "sender": user_id,
+ "sender": self.user_id,
},
prev_event_ids=latest_event_ids,
)
)
self.get_success(
- event_handler.handle_new_client_event(requester, event, context)
+ event_handler.handle_new_client_event(self.requester, event, context)
)
- state2 = list(self.get_success(context.get_current_state_ids()).values())
+ state2 = set(self.get_success(context.get_current_state_ids()).values())
# Delete the chain cover info.
@@ -551,36 +551,191 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
txn.execute("DELETE FROM event_auth_chains")
txn.execute("DELETE FROM event_auth_chain_links")
- self.get_success(store.db_pool.runInteraction("test", _delete_tables))
+ self.get_success(self.store.db_pool.runInteraction("test", _delete_tables))
+
+ return room_id, [state1, state2]
+
+ def test_background_update_single_room(self):
+ """Test that the background update to calculate auth chains for historic
+ rooms works correctly.
+ """
+
+ # Create a room
+ room_id, states = self._generate_room()
# Insert and run the background update.
self.get_success(
- store.db_pool.simple_insert(
+ self.store.db_pool.simple_insert(
"background_updates",
{"update_name": "chain_cover", "progress_json": "{}"},
)
)
# Ugh, have to reset this flag
- store.db_pool.updates._all_done = False
+ self.store.db_pool.updates._all_done = False
while not self.get_success(
- store.db_pool.updates.has_completed_background_updates()
+ self.store.db_pool.updates.has_completed_background_updates()
):
self.get_success(
- store.db_pool.updates.do_next_background_update(100), by=0.1
+ self.store.db_pool.updates.do_next_background_update(100), by=0.1
)
# Test that the `has_auth_chain_index` has been set
- self.assertTrue(self.get_success(store.has_auth_chain_index(room_id)))
+ self.assertTrue(self.get_success(self.store.has_auth_chain_index(room_id)))
# Test that calculating the auth chain difference using the newly
# calculated chain cover works.
self.get_success(
- store.db_pool.runInteraction(
+ self.store.db_pool.runInteraction(
"test",
- store._get_auth_chain_difference_using_cover_index_txn,
+ self.store._get_auth_chain_difference_using_cover_index_txn,
room_id,
- [state1, state2],
+ states,
+ )
+ )
+
+ def test_background_update_multiple_rooms(self):
+ """Test that the background update to calculate auth chains for historic
+ rooms works correctly.
+ """
+ # Create a room
+ room_id1, states1 = self._generate_room()
+ room_id2, states2 = self._generate_room()
+ room_id3, states2 = self._generate_room()
+
+ # Insert and run the background update.
+ self.get_success(
+ self.store.db_pool.simple_insert(
+ "background_updates",
+ {"update_name": "chain_cover", "progress_json": "{}"},
+ )
+ )
+
+ # Ugh, have to reset this flag
+ self.store.db_pool.updates._all_done = False
+
+ while not self.get_success(
+ self.store.db_pool.updates.has_completed_background_updates()
+ ):
+ self.get_success(
+ self.store.db_pool.updates.do_next_background_update(100), by=0.1
+ )
+
+ # Test that the `has_auth_chain_index` has been set
+ self.assertTrue(self.get_success(self.store.has_auth_chain_index(room_id1)))
+ self.assertTrue(self.get_success(self.store.has_auth_chain_index(room_id2)))
+ self.assertTrue(self.get_success(self.store.has_auth_chain_index(room_id3)))
+
+ # Test that calculating the auth chain difference using the newly
+ # calculated chain cover works.
+ self.get_success(
+ self.store.db_pool.runInteraction(
+ "test",
+ self.store._get_auth_chain_difference_using_cover_index_txn,
+ room_id1,
+ states1,
)
)
+
+ def test_background_update_single_large_room(self):
+ """Test that the background update to calculate auth chains for historic
+ rooms works correctly.
+ """
+
+ # Create a room
+ room_id, states = self._generate_room()
+
+ # Add a bunch of state so that it takes multiple iterations of the
+ # background update to process the room.
+ for i in range(0, 150):
+ self.helper.send_state(
+ room_id, event_type="m.test", body={"index": i}, tok=self.token
+ )
+
+ # Insert and run the background update.
+ self.get_success(
+ self.store.db_pool.simple_insert(
+ "background_updates",
+ {"update_name": "chain_cover", "progress_json": "{}"},
+ )
+ )
+
+ # Ugh, have to reset this flag
+ self.store.db_pool.updates._all_done = False
+
+ iterations = 0
+ while not self.get_success(
+ self.store.db_pool.updates.has_completed_background_updates()
+ ):
+ iterations += 1
+ self.get_success(
+ self.store.db_pool.updates.do_next_background_update(100), by=0.1
+ )
+
+ # Ensure that we did actually take multiple iterations to process the
+ # room.
+ self.assertGreater(iterations, 1)
+
+ # Test that the `has_auth_chain_index` has been set
+ self.assertTrue(self.get_success(self.store.has_auth_chain_index(room_id)))
+
+ # Test that calculating the auth chain difference using the newly
+ # calculated chain cover works.
+ self.get_success(
+ self.store.db_pool.runInteraction(
+ "test",
+ self.store._get_auth_chain_difference_using_cover_index_txn,
+ room_id,
+ states,
+ )
+ )
+
+ def test_background_update_multiple_large_room(self):
+ """Test that the background update to calculate auth chains for historic
+ rooms works correctly.
+ """
+
+ # Create the rooms
+ room_id1, _ = self._generate_room()
+ room_id2, _ = self._generate_room()
+
+ # Add a bunch of state so that it takes multiple iterations of the
+ # background update to process the room.
+ for i in range(0, 150):
+ self.helper.send_state(
+ room_id1, event_type="m.test", body={"index": i}, tok=self.token
+ )
+
+ for i in range(0, 150):
+ self.helper.send_state(
+ room_id2, event_type="m.test", body={"index": i}, tok=self.token
+ )
+
+ # Insert and run the background update.
+ self.get_success(
+ self.store.db_pool.simple_insert(
+ "background_updates",
+ {"update_name": "chain_cover", "progress_json": "{}"},
+ )
+ )
+
+ # Ugh, have to reset this flag
+ self.store.db_pool.updates._all_done = False
+
+ iterations = 0
+ while not self.get_success(
+ self.store.db_pool.updates.has_completed_background_updates()
+ ):
+ iterations += 1
+ self.get_success(
+ self.store.db_pool.updates.do_next_background_update(100), by=0.1
+ )
+
+ # Ensure that we did actually take multiple iterations to process the
+ # room.
+ self.assertGreater(iterations, 1)
+
+ # Test that the `has_auth_chain_index` has been set
+ self.assertTrue(self.get_success(self.store.has_auth_chain_index(room_id1)))
+ self.assertTrue(self.get_success(self.store.has_auth_chain_index(room_id2)))
|