summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.md6
-rw-r--r--changelog.d/8595.misc1
-rw-r--r--changelog.d/8635.doc1
-rw-r--r--changelog.d/8676.bugfix1
-rw-r--r--changelog.d/8678.bugfix1
-rw-r--r--changelog.d/8682.bugfix1
-rw-r--r--changelog.d/8688.misc1
-rw-r--r--changelog.d/8690.misc1
-rw-r--r--docs/sample_config.yaml126
-rw-r--r--synapse/__init__.py2
-rw-r--r--synapse/config/cas.py46
-rw-r--r--synapse/config/oidc_config.py7
-rw-r--r--synapse/config/saml2_config.py81
-rw-r--r--synapse/push/bulk_push_rule_evaluator.py37
-rw-r--r--synapse/rest/media/v1/media_repository.py165
-rw-r--r--synapse/rest/media/v1/media_storage.py30
-rw-r--r--synapse/storage/databases/main/events_worker.py2
-rw-r--r--synapse/storage/databases/main/media_repository.py27
-rw-r--r--synapse/util/caches/descriptors.py235
-rw-r--r--tests/replication/test_multi_media_repo.py277
-rw-r--r--tests/server.py2
-rw-r--r--tests/test_utils/__init__.py34
-rw-r--r--tests/unittest.py6
-rw-r--r--tests/util/caches/test_descriptors.py60
24 files changed, 905 insertions, 245 deletions
diff --git a/CHANGES.md b/CHANGES.md

index 58973d2a5d..8bd7825089 100644 --- a/CHANGES.md +++ b/CHANGES.md
@@ -1,11 +1,11 @@ -Synapse 1.22.0 (2020-10-30) +Synapse 1.22.1 (2020-10-30) =========================== Bugfixes -------- -- Fix a bug where an appservice may not be forwarded events for a room it was recently invited to. Broken in v1.22.0. ([\#8676](https://github.com/matrix-org/synapse/issues/8676)) -- Fix `Object of type frozendict is not JSON serializable` exceptions when using third-party event rules. ([\#8678](https://github.com/matrix-org/synapse/issues/8678)) +- Fix a bug where an appservice may not be forwarded events for a room it was recently invited to. Broke in v1.22.0. ([\#8676](https://github.com/matrix-org/synapse/issues/8676)) +- Fix `Object of type frozendict is not JSON serializable` exceptions when using third-party event rules. Broke in v1.22.0. ([\#8678](https://github.com/matrix-org/synapse/issues/8678)) Synapse 1.22.0 (2020-10-27) diff --git a/changelog.d/8595.misc b/changelog.d/8595.misc new file mode 100644
index 0000000000..24fab65cda --- /dev/null +++ b/changelog.d/8595.misc
@@ -0,0 +1 @@ +Implement and use an @lru_cache decorator. diff --git a/changelog.d/8635.doc b/changelog.d/8635.doc new file mode 100644
index 0000000000..00fb1e61a7 --- /dev/null +++ b/changelog.d/8635.doc
@@ -0,0 +1 @@ +Improve the sample configuration for single sign-on providers. diff --git a/changelog.d/8676.bugfix b/changelog.d/8676.bugfix deleted file mode 100644
index df16c72761..0000000000 --- a/changelog.d/8676.bugfix +++ /dev/null
@@ -1 +0,0 @@ -Fix a bug where an appservice may not be forwarded events for a room it was recently invited to. Broken in v1.22.0. diff --git a/changelog.d/8678.bugfix b/changelog.d/8678.bugfix deleted file mode 100644
index 0508d8f109..0000000000 --- a/changelog.d/8678.bugfix +++ /dev/null
@@ -1 +0,0 @@ -Fix `Object of type frozendict is not JSON serializable` exceptions when using third-party event rules. diff --git a/changelog.d/8682.bugfix b/changelog.d/8682.bugfix new file mode 100644
index 0000000000..e61276aa05 --- /dev/null +++ b/changelog.d/8682.bugfix
@@ -0,0 +1 @@ +Fix exception during handling multiple concurrent requests for remote media when using multiple media repositories. diff --git a/changelog.d/8688.misc b/changelog.d/8688.misc new file mode 100644
index 0000000000..bef8dc425a --- /dev/null +++ b/changelog.d/8688.misc
@@ -0,0 +1 @@ +Abstract some invite-related code in preparation for landing knocking. \ No newline at end of file diff --git a/changelog.d/8690.misc b/changelog.d/8690.misc new file mode 100644
index 0000000000..0f38ba1f5d --- /dev/null +++ b/changelog.d/8690.misc
@@ -0,0 +1 @@ +Fail tests if they do not await coroutines. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index de4ad50458..6cd95707af 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml
@@ -1683,10 +1683,8 @@ trusted_key_servers: ## Single sign-on integration ## -# Enable SAML2 for registration and login. Uses pysaml2. -# -# At least one of `sp_config` or `config_path` must be set in this section to -# enable SAML login. +# The following settings can be used to make Synapse use a single sign-on +# provider for authentication, instead of its internal password database. # # You will probably also want to set the following options to `false` to # disable the regular login/registration flows: @@ -1695,6 +1693,11 @@ trusted_key_servers: # # You will also want to investigate the settings under the "sso" configuration # section below. + +# Enable SAML2 for registration and login. Uses pysaml2. +# +# At least one of `sp_config` or `config_path` must be set in this section to +# enable SAML login. # # Once SAML support is enabled, a metadata file will be exposed at # https://<server>:<port>/_matrix/saml2/metadata.xml, which you may be able to @@ -1710,40 +1713,42 @@ saml2_config: # so it is not normally necessary to specify them unless you need to # override them. # - #sp_config: - # # point this to the IdP's metadata. You can use either a local file or - # # (preferably) a URL. - # metadata: - # #local: ["saml2/idp.xml"] - # remote: - # - url: https://our_idp/metadata.xml - # - # # By default, the user has to go to our login page first. If you'd like - # # to allow IdP-initiated login, set 'allow_unsolicited: true' in a - # # 'service.sp' section: - # # - # #service: - # # sp: - # # allow_unsolicited: true - # - # # The examples below are just used to generate our metadata xml, and you - # # may well not need them, depending on your setup. Alternatively you - # # may need a whole lot more detail - see the pysaml2 docs! - # - # description: ["My awesome SP", "en"] - # name: ["Test SP", "en"] - # - # organization: - # name: Example com - # display_name: - # - ["Example co", "en"] - # url: "http://example.com" - # - # contact_person: - # - given_name: Bob - # sur_name: "the Sysadmin" - # email_address": ["admin@example.com"] - # contact_type": technical + sp_config: + # Point this to the IdP's metadata. You must provide either a local + # file via the `local` attribute or (preferably) a URL via the + # `remote` attribute. + # + #metadata: + # local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + + # By default, the user has to go to our login page first. If you'd like + # to allow IdP-initiated login, set 'allow_unsolicited: true' in a + # 'service.sp' section: + # + #service: + # sp: + # allow_unsolicited: true + + # The examples below are just used to generate our metadata xml, and you + # may well not need them, depending on your setup. Alternatively you + # may need a whole lot more detail - see the pysaml2 docs! + + #description: ["My awesome SP", "en"] + #name: ["Test SP", "en"] + + #organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + + #contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical # Instead of putting the config inline as above, you can specify a # separate pysaml2 configuration file: @@ -1819,11 +1824,10 @@ saml2_config: # value: "sales" -# OpenID Connect integration. The following settings can be used to make Synapse -# use an OpenID Connect Provider for authentication, instead of its internal -# password database. +# Enable OpenID Connect (OIDC) / OAuth 2.0 for registration and login. # -# See https://github.com/matrix-org/synapse/blob/master/docs/openid.md. +# See https://github.com/matrix-org/synapse/blob/master/docs/openid.md +# for some example configurations. # oidc_config: # Uncomment the following to enable authorization against an OpenID Connect @@ -1956,15 +1960,37 @@ oidc_config: -# Enable CAS for registration and login. +# Enable Central Authentication Service (CAS) for registration and login. # -#cas_config: -# enabled: true -# server_url: "https://cas-server.com" -# service_url: "https://homeserver.domain.com:8448" -# #displayname_attribute: name -# #required_attributes: -# # name: value +cas_config: + # Uncomment the following to enable authorization against a CAS server. + # Defaults to false. + # + #enabled: true + + # The URL of the CAS authorization endpoint. + # + #server_url: "https://cas-server.com" + + # The public URL of the homeserver. + # + #service_url: "https://homeserver.domain.com:8448" + + # The attribute of the CAS response to use as the display name. + # + # If unset, no displayname will be set. + # + #displayname_attribute: name + + # It is possible to configure Synapse to only allow logins if CAS attributes + # match particular values. All of the keys in the mapping below must exist + # and the values must match the given value. Alternately if the given value + # is None then any value is allowed (the attribute just must exist). + # All of the listed attributes must match for the login to be permitted. + # + #required_attributes: + # userGroup: "staff" + # department: None # Additional settings to use with single-sign on systems such as OpenID Connect, diff --git a/synapse/__init__.py b/synapse/__init__.py
index 35a82c0bfe..3e1df2b035 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py
@@ -48,7 +48,7 @@ try: except ImportError: pass -__version__ = "1.22.0" +__version__ = "1.22.1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index 4526c1a67b..2f97e6d258 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py
@@ -26,14 +26,14 @@ class CasConfig(Config): def read_config(self, config, **kwargs): cas_config = config.get("cas_config", None) - if cas_config: - self.cas_enabled = cas_config.get("enabled", True) + self.cas_enabled = cas_config and cas_config.get("enabled", True) + + if self.cas_enabled: self.cas_server_url = cas_config["server_url"] self.cas_service_url = cas_config["service_url"] self.cas_displayname_attribute = cas_config.get("displayname_attribute") - self.cas_required_attributes = cas_config.get("required_attributes", {}) + self.cas_required_attributes = cas_config.get("required_attributes") or {} else: - self.cas_enabled = False self.cas_server_url = None self.cas_service_url = None self.cas_displayname_attribute = None @@ -41,13 +41,35 @@ class CasConfig(Config): def generate_config_section(self, config_dir_path, server_name, **kwargs): return """ - # Enable CAS for registration and login. + # Enable Central Authentication Service (CAS) for registration and login. # - #cas_config: - # enabled: true - # server_url: "https://cas-server.com" - # service_url: "https://homeserver.domain.com:8448" - # #displayname_attribute: name - # #required_attributes: - # # name: value + cas_config: + # Uncomment the following to enable authorization against a CAS server. + # Defaults to false. + # + #enabled: true + + # The URL of the CAS authorization endpoint. + # + #server_url: "https://cas-server.com" + + # The public URL of the homeserver. + # + #service_url: "https://homeserver.domain.com:8448" + + # The attribute of the CAS response to use as the display name. + # + # If unset, no displayname will be set. + # + #displayname_attribute: name + + # It is possible to configure Synapse to only allow logins if CAS attributes + # match particular values. All of the keys in the mapping below must exist + # and the values must match the given value. Alternately if the given value + # is None then any value is allowed (the attribute just must exist). + # All of the listed attributes must match for the login to be permitted. + # + #required_attributes: + # userGroup: "staff" + # department: None """ diff --git a/synapse/config/oidc_config.py b/synapse/config/oidc_config.py
index 7597fbc864..69d188341c 100644 --- a/synapse/config/oidc_config.py +++ b/synapse/config/oidc_config.py
@@ -87,11 +87,10 @@ class OIDCConfig(Config): def generate_config_section(self, config_dir_path, server_name, **kwargs): return """\ - # OpenID Connect integration. The following settings can be used to make Synapse - # use an OpenID Connect Provider for authentication, instead of its internal - # password database. + # Enable OpenID Connect (OIDC) / OAuth 2.0 for registration and login. # - # See https://github.com/matrix-org/synapse/blob/master/docs/openid.md. + # See https://github.com/matrix-org/synapse/blob/master/docs/openid.md + # for some example configurations. # oidc_config: # Uncomment the following to enable authorization against an OpenID Connect diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index 99aa8b3bf1..778750f43b 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py
@@ -216,10 +216,8 @@ class SAML2Config(Config): return """\ ## Single sign-on integration ## - # Enable SAML2 for registration and login. Uses pysaml2. - # - # At least one of `sp_config` or `config_path` must be set in this section to - # enable SAML login. + # The following settings can be used to make Synapse use a single sign-on + # provider for authentication, instead of its internal password database. # # You will probably also want to set the following options to `false` to # disable the regular login/registration flows: @@ -228,6 +226,11 @@ class SAML2Config(Config): # # You will also want to investigate the settings under the "sso" configuration # section below. + + # Enable SAML2 for registration and login. Uses pysaml2. + # + # At least one of `sp_config` or `config_path` must be set in this section to + # enable SAML login. # # Once SAML support is enabled, a metadata file will be exposed at # https://<server>:<port>/_matrix/saml2/metadata.xml, which you may be able to @@ -243,40 +246,42 @@ class SAML2Config(Config): # so it is not normally necessary to specify them unless you need to # override them. # - #sp_config: - # # point this to the IdP's metadata. You can use either a local file or - # # (preferably) a URL. - # metadata: - # #local: ["saml2/idp.xml"] - # remote: - # - url: https://our_idp/metadata.xml - # - # # By default, the user has to go to our login page first. If you'd like - # # to allow IdP-initiated login, set 'allow_unsolicited: true' in a - # # 'service.sp' section: - # # - # #service: - # # sp: - # # allow_unsolicited: true - # - # # The examples below are just used to generate our metadata xml, and you - # # may well not need them, depending on your setup. Alternatively you - # # may need a whole lot more detail - see the pysaml2 docs! - # - # description: ["My awesome SP", "en"] - # name: ["Test SP", "en"] - # - # organization: - # name: Example com - # display_name: - # - ["Example co", "en"] - # url: "http://example.com" - # - # contact_person: - # - given_name: Bob - # sur_name: "the Sysadmin" - # email_address": ["admin@example.com"] - # contact_type": technical + sp_config: + # Point this to the IdP's metadata. You must provide either a local + # file via the `local` attribute or (preferably) a URL via the + # `remote` attribute. + # + #metadata: + # local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + + # By default, the user has to go to our login page first. If you'd like + # to allow IdP-initiated login, set 'allow_unsolicited: true' in a + # 'service.sp' section: + # + #service: + # sp: + # allow_unsolicited: true + + # The examples below are just used to generate our metadata xml, and you + # may well not need them, depending on your setup. Alternatively you + # may need a whole lot more detail - see the pysaml2 docs! + + #description: ["My awesome SP", "en"] + #name: ["Test SP", "en"] + + #organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + + #contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical # Instead of putting the config inline as above, you can specify a # separate pysaml2 configuration file: diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index d9b5478b53..82a72dc34f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -15,8 +15,8 @@ # limitations under the License. import logging -from collections import namedtuple +import attr from prometheus_client import Counter from synapse.api.constants import EventTypes, Membership, RelationTypes @@ -26,7 +26,8 @@ from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.util.async_helpers import Linearizer from synapse.util.caches import register_cache -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import lru_cache +from synapse.util.caches.lrucache import LruCache from .push_rule_evaluator import PushRuleEvaluatorForEvent @@ -120,7 +121,7 @@ class BulkPushRuleEvaluator: dict of user_id -> push_rules """ room_id = event.room_id - rules_for_room = await self._get_rules_for_room(room_id) + rules_for_room = self._get_rules_for_room(room_id) rules_by_user = await rules_for_room.get_rules(event, context) @@ -138,7 +139,7 @@ class BulkPushRuleEvaluator: return rules_by_user - @cached() + @lru_cache() def _get_rules_for_room(self, room_id): """Get the current RulesForRoom object for the given room id @@ -275,12 +276,14 @@ class RulesForRoom: the entire cache for the room. """ - def __init__(self, hs, room_id, rules_for_room_cache, room_push_rule_cache_metrics): + def __init__( + self, hs, room_id, rules_for_room_cache: LruCache, room_push_rule_cache_metrics + ): """ Args: hs (HomeServer) room_id (str) - rules_for_room_cache(Cache): The cache object that caches these + rules_for_room_cache: The cache object that caches these RoomsForUser objects. room_push_rule_cache_metrics (CacheMetric) """ @@ -489,13 +492,21 @@ class RulesForRoom: self.state_group = state_group -class _Invalidation(namedtuple("_Invalidation", ("cache", "room_id"))): - # We rely on _CacheContext implementing __eq__ and __hash__ sensibly, - # which namedtuple does for us (i.e. two _CacheContext are the same if - # their caches and keys match). This is important in particular to - # dedupe when we add callbacks to lru cache nodes, otherwise the number - # of callbacks would grow. +@attr.attrs(slots=True, frozen=True) +class _Invalidation: + # _Invalidation is passed as an `on_invalidate` callback to bulk_get_push_rules, + # which means that it it is stored on the bulk_get_push_rules cache entry. In order + # to ensure that we don't accumulate lots of redunant callbacks on the cache entry, + # we need to ensure that two _Invalidation objects are "equal" if they refer to the + # same `cache` and `room_id`. + # + # attrs provides suitable __hash__ and __eq__ methods, provided we remember to + # set `frozen=True`. + + cache = attr.ib(type=LruCache) + room_id = attr.ib(type=str) + def __call__(self): - rules = self.cache.get_immediate(self.room_id, None, update_metrics=False) + rules = self.cache.get(self.room_id, None, update_metrics=False) if rules: rules.invalidate_all() diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 5cce7237a0..9cac74ebd8 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py
@@ -305,15 +305,12 @@ class MediaRepository: # file_id is the ID we use to track the file locally. If we've already # seen the file then reuse the existing ID, otherwise genereate a new # one. - if media_info: - file_id = media_info["filesystem_id"] - else: - file_id = random_string(24) - - file_info = FileInfo(server_name, file_id) # If we have an entry in the DB, try and look for it if media_info: + file_id = media_info["filesystem_id"] + file_info = FileInfo(server_name, file_id) + if media_info["quarantined_by"]: logger.info("Media is quarantined") raise NotFoundError() @@ -324,14 +321,34 @@ class MediaRepository: # Failed to find the file anywhere, lets download it. - media_info = await self._download_remote_file(server_name, media_id, file_id) + try: + media_info = await self._download_remote_file(server_name, media_id,) + except SynapseError: + raise + except Exception as e: + # An exception may be because we downloaded media in another + # process, so let's check if we magically have the media. + media_info = await self.store.get_cached_remote_media(server_name, media_id) + if not media_info: + raise e + + file_id = media_info["filesystem_id"] + file_info = FileInfo(server_name, file_id) + + # We generate thumbnails even if another process downloaded the media + # as a) it's conceivable that the other download request dies before it + # generates thumbnails, but mainly b) we want to be sure the thumbnails + # have finished being generated before responding to the client, + # otherwise they'll request thumbnails and get a 404 if they're not + # ready yet. + await self._generate_thumbnails( + server_name, media_id, file_id, media_info["media_type"] + ) responder = await self.media_storage.fetch_media(file_info) return responder, media_info - async def _download_remote_file( - self, server_name: str, media_id: str, file_id: str - ) -> dict: + async def _download_remote_file(self, server_name: str, media_id: str,) -> dict: """Attempt to download the remote file from the given server name, using the given file_id as the local id. @@ -346,6 +363,8 @@ class MediaRepository: The media info of the file. """ + file_id = random_string(24) + file_info = FileInfo(server_name=server_name, file_id=file_id) with self.media_storage.store_into_file(file_info) as (f, fname, finish): @@ -401,22 +420,32 @@ class MediaRepository: await finish() - media_type = headers[b"Content-Type"][0].decode("ascii") - upload_name = get_filename_from_headers(headers) - time_now_ms = self.clock.time_msec() + media_type = headers[b"Content-Type"][0].decode("ascii") + upload_name = get_filename_from_headers(headers) + time_now_ms = self.clock.time_msec() + + # Multiple remote media download requests can race (when using + # multiple media repos), so this may throw a violation constraint + # exception. If it does we'll delete the newly downloaded file from + # disk (as we're in the ctx manager). + # + # However: we've already called `finish()` so we may have also + # written to the storage providers. This is preferable to the + # alternative where we call `finish()` *after* this, where we could + # end up having an entry in the DB but fail to write the files to + # the storage providers. + await self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=length, + filesystem_id=file_id, + ) logger.info("Stored remote media in file %r", fname) - await self.store.store_cached_remote_media( - origin=server_name, - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=upload_name, - media_length=length, - filesystem_id=file_id, - ) - media_info = { "media_type": media_type, "media_length": length, @@ -425,8 +454,6 @@ class MediaRepository: "filesystem_id": file_id, } - await self._generate_thumbnails(server_name, media_id, file_id, media_type) - return media_info def _get_thumbnail_requirements(self, media_type): @@ -692,42 +719,60 @@ class MediaRepository: if not t_byte_source: continue - try: - file_info = FileInfo( - server_name=server_name, - file_id=file_id, - thumbnail=True, - thumbnail_width=t_width, - thumbnail_height=t_height, - thumbnail_method=t_method, - thumbnail_type=t_type, - url_cache=url_cache, - ) - - output_path = await self.media_storage.store_file( - t_byte_source, file_info - ) - finally: - t_byte_source.close() - - t_len = os.path.getsize(output_path) + file_info = FileInfo( + server_name=server_name, + file_id=file_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + url_cache=url_cache, + ) - # Write to database - if server_name: - await self.store.store_remote_media_thumbnail( - server_name, - media_id, - file_id, - t_width, - t_height, - t_type, - t_method, - t_len, - ) - else: - await self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + try: + await self.media_storage.write_to_file(t_byte_source, f) + await finish() + finally: + t_byte_source.close() + + t_len = os.path.getsize(fname) + + # Write to database + if server_name: + # Multiple remote media download requests can race (when + # using multiple media repos), so this may throw a violation + # constraint exception. If it does we'll delete the newly + # generated thumbnail from disk (as we're in the ctx + # manager). + # + # However: we've already called `finish()` so we may have + # also written to the storage providers. This is preferable + # to the alternative where we call `finish()` *after* this, + # where we could end up having an entry in the DB but fail + # to write the files to the storage providers. + try: + await self.store.store_remote_media_thumbnail( + server_name, + media_id, + file_id, + t_width, + t_height, + t_type, + t_method, + t_len, + ) + except Exception as e: + thumbnail_exists = await self.store.get_remote_media_thumbnail( + server_name, media_id, t_width, t_height, t_type, + ) + if not thumbnail_exists: + raise e + else: + await self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) return {"width": m_width, "height": m_height} diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index a9586fb0b7..268e0c8f50 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py
@@ -52,6 +52,7 @@ class MediaStorage: storage_providers: Sequence["StorageProviderWrapper"], ): self.hs = hs + self.reactor = hs.get_reactor() self.local_media_directory = local_media_directory self.filepaths = filepaths self.storage_providers = storage_providers @@ -70,13 +71,16 @@ class MediaStorage: with self.store_into_file(file_info) as (f, fname, finish_cb): # Write to the main repository - await defer_to_thread( - self.hs.get_reactor(), _write_file_synchronously, source, f - ) + await self.write_to_file(source, f) await finish_cb() return fname + async def write_to_file(self, source: IO, output: IO): + """Asynchronously write the `source` to `output`. + """ + await defer_to_thread(self.reactor, _write_file_synchronously, source, output) + @contextlib.contextmanager def store_into_file(self, file_info: FileInfo): """Context manager used to get a file like object to write into, as @@ -112,14 +116,20 @@ class MediaStorage: finished_called = [False] - async def finish(): - for provider in self.storage_providers: - await provider.store_file(path, file_info) - - finished_called[0] = True - try: with open(fname, "wb") as f: + + async def finish(): + # Ensure that all writes have been flushed and close the + # file. + f.flush() + f.close() + + for provider in self.storage_providers: + await provider.store_file(path, file_info) + + finished_called[0] = True + yield f, fname, finish except Exception: try: @@ -210,7 +220,7 @@ class MediaStorage: if res: with res: consumer = BackgroundFileConsumer( - open(local_path, "wb"), self.hs.get_reactor() + open(local_path, "wb"), self.reactor ) await res.write_to_consumer(consumer) await consumer.wait() diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 5ae263827d..4732685f6e 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py
@@ -530,7 +530,7 @@ class EventsWorkerStore(SQLBaseStore): self, context: EventContext, state_types_to_include: List[EventTypes], - membership_user_id: Optional[str], + membership_user_id: Optional[str] = None, ) -> List[JsonDict]: """ Retrieve the stripped state from a room, given an event context to retrieve state diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index daf57675d8..4b2f224718 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py
@@ -452,6 +452,33 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): desc="get_remote_media_thumbnails", ) + async def get_remote_media_thumbnail( + self, origin: str, media_id: str, t_width: int, t_height: int, t_type: str, + ) -> Optional[Dict[str, Any]]: + """Fetch the thumbnail info of given width, height and type. + """ + + return await self.db_pool.simple_select_one( + table="remote_media_cache_thumbnails", + keyvalues={ + "media_origin": origin, + "media_id": media_id, + "thumbnail_width": t_width, + "thumbnail_height": t_height, + "thumbnail_type": t_type, + }, + retcols=( + "thumbnail_width", + "thumbnail_height", + "thumbnail_method", + "thumbnail_type", + "thumbnail_length", + "filesystem_id", + ), + allow_none=True, + desc="get_remote_media_thumbnail", + ) + async def store_remote_media_thumbnail( self, origin, diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 5d7fffee66..a924140cdf 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py
@@ -13,10 +13,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import enum import functools import inspect import logging -from typing import Any, Callable, Generic, Optional, Tuple, TypeVar, Union, cast +from typing import ( + Any, + Callable, + Generic, + Iterable, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, + cast, +) from weakref import WeakValueDictionary from twisted.internet import defer @@ -24,6 +37,7 @@ from twisted.internet import defer from synapse.logging.context import make_deferred_yieldable, preserve_fn from synapse.util import unwrapFirstError from synapse.util.caches.deferred_cache import DeferredCache +from synapse.util.caches.lrucache import LruCache logger = logging.getLogger(__name__) @@ -48,7 +62,7 @@ class _CachedFunction(Generic[F]): class _CacheDescriptorBase: - def __init__(self, orig: _CachedFunction, num_args, cache_context=False): + def __init__(self, orig: Callable[..., Any], num_args, cache_context=False): self.orig = orig arg_spec = inspect.getfullargspec(orig) @@ -97,8 +111,107 @@ class _CacheDescriptorBase: self.add_cache_context = cache_context + self.cache_key_builder = get_cache_key_builder( + self.arg_names, self.arg_defaults + ) + + +class _LruCachedFunction(Generic[F]): + cache = None # type: LruCache[CacheKey, Any] + __call__ = None # type: F + + +def lru_cache( + max_entries: int = 1000, cache_context: bool = False, +) -> Callable[[F], _LruCachedFunction[F]]: + """A method decorator that applies a memoizing cache around the function. + + This is more-or-less a drop-in equivalent to functools.lru_cache, although note + that the signature is slightly different. + + The main differences with functools.lru_cache are: + (a) the size of the cache can be controlled via the cache_factor mechanism + (b) the wrapped function can request a "cache_context" which provides a + callback mechanism to indicate that the result is no longer valid + (c) prometheus metrics are exposed automatically. + + The function should take zero or more arguments, which are used as the key for the + cache. Single-argument functions use that argument as the cache key; otherwise the + arguments are built into a tuple. + + Cached functions can be "chained" (i.e. a cached function can call other cached + functions and get appropriately invalidated when they called caches are + invalidated) by adding a special "cache_context" argument to the function + and passing that as a kwarg to all caches called. For example: + + @lru_cache(cache_context=True) + def foo(self, key, cache_context): + r1 = self.bar1(key, on_invalidate=cache_context.invalidate) + r2 = self.bar2(key, on_invalidate=cache_context.invalidate) + return r1 + r2 + + The wrapped function also has a 'cache' property which offers direct access to the + underlying LruCache. + """ + + def func(orig: F) -> _LruCachedFunction[F]: + desc = LruCacheDescriptor( + orig, max_entries=max_entries, cache_context=cache_context, + ) + return cast(_LruCachedFunction[F], desc) + + return func + + +class LruCacheDescriptor(_CacheDescriptorBase): + """Helper for @lru_cache""" + + class _Sentinel(enum.Enum): + sentinel = object() + + def __init__( + self, orig, max_entries: int = 1000, cache_context: bool = False, + ): + super().__init__(orig, num_args=None, cache_context=cache_context) + self.max_entries = max_entries + + def __get__(self, obj, owner): + cache = LruCache( + cache_name=self.orig.__name__, max_size=self.max_entries, + ) # type: LruCache[CacheKey, Any] + + get_cache_key = self.cache_key_builder + sentinel = LruCacheDescriptor._Sentinel.sentinel + + @functools.wraps(self.orig) + def _wrapped(*args, **kwargs): + invalidate_callback = kwargs.pop("on_invalidate", None) + callbacks = (invalidate_callback,) if invalidate_callback else () + + cache_key = get_cache_key(args, kwargs) -class CacheDescriptor(_CacheDescriptorBase): + ret = cache.get(cache_key, default=sentinel, callbacks=callbacks) + if ret != sentinel: + return ret + + # Add our own `cache_context` to argument list if the wrapped function + # has asked for one + if self.add_cache_context: + kwargs["cache_context"] = _CacheContext.get_instance(cache, cache_key) + + ret2 = self.orig(obj, *args, **kwargs) + cache.set(cache_key, ret2, callbacks=callbacks) + + return ret2 + + wrapped = cast(_CachedFunction, _wrapped) + wrapped.cache = cache + obj.__dict__[self.orig.__name__] = wrapped + + return wrapped + + +class DeferredCacheDescriptor(_CacheDescriptorBase): """ A method decorator that applies a memoizing cache around the function. This caches deferreds, rather than the results themselves. Deferreds that @@ -141,7 +254,6 @@ class CacheDescriptor(_CacheDescriptorBase): cache_context=False, iterable=False, ): - super().__init__(orig, num_args=num_args, cache_context=cache_context) self.max_entries = max_entries @@ -157,41 +269,7 @@ class CacheDescriptor(_CacheDescriptorBase): iterable=self.iterable, ) # type: DeferredCache[CacheKey, Any] - def get_cache_key_gen(args, kwargs): - """Given some args/kwargs return a generator that resolves into - the cache_key. - - We loop through each arg name, looking up if its in the `kwargs`, - otherwise using the next argument in `args`. If there are no more - args then we try looking the arg name up in the defaults - """ - pos = 0 - for nm in self.arg_names: - if nm in kwargs: - yield kwargs[nm] - elif pos < len(args): - yield args[pos] - pos += 1 - else: - yield self.arg_defaults[nm] - - # By default our cache key is a tuple, but if there is only one item - # then don't bother wrapping in a tuple. This is to save memory. - if self.num_args == 1: - nm = self.arg_names[0] - - def get_cache_key(args, kwargs): - if nm in kwargs: - return kwargs[nm] - elif len(args): - return args[0] - else: - return self.arg_defaults[nm] - - else: - - def get_cache_key(args, kwargs): - return tuple(get_cache_key_gen(args, kwargs)) + get_cache_key = self.cache_key_builder @functools.wraps(self.orig) def _wrapped(*args, **kwargs): @@ -223,7 +301,6 @@ class CacheDescriptor(_CacheDescriptorBase): wrapped.prefill = lambda key, val: cache.prefill(key[0], val) else: wrapped.invalidate = cache.invalidate - wrapped.invalidate_all = cache.invalidate_all wrapped.invalidate_many = cache.invalidate_many wrapped.prefill = cache.prefill @@ -236,7 +313,7 @@ class CacheDescriptor(_CacheDescriptorBase): return wrapped -class CacheListDescriptor(_CacheDescriptorBase): +class DeferredCacheListDescriptor(_CacheDescriptorBase): """Wraps an existing cache to support bulk fetching of keys. Given a list of keys it looks in the cache to find any hits, then passes @@ -382,11 +459,13 @@ class _CacheContext: on a lower level. """ + Cache = Union[DeferredCache, LruCache] + _cache_context_objects = ( WeakValueDictionary() - ) # type: WeakValueDictionary[Tuple[DeferredCache, CacheKey], _CacheContext] + ) # type: WeakValueDictionary[Tuple[_CacheContext.Cache, CacheKey], _CacheContext] - def __init__(self, cache, cache_key): # type: (DeferredCache, CacheKey) -> None + def __init__(self, cache: "_CacheContext.Cache", cache_key: CacheKey) -> None: self._cache = cache self._cache_key = cache_key @@ -396,8 +475,8 @@ class _CacheContext: @classmethod def get_instance( - cls, cache, cache_key - ): # type: (DeferredCache, CacheKey) -> _CacheContext + cls, cache: "_CacheContext.Cache", cache_key: CacheKey + ) -> "_CacheContext": """Returns an instance constructed with the given arguments. A new instance is only created if none already exists. @@ -418,7 +497,7 @@ def cached( cache_context: bool = False, iterable: bool = False, ) -> Callable[[F], _CachedFunction[F]]: - func = lambda orig: CacheDescriptor( + func = lambda orig: DeferredCacheDescriptor( orig, max_entries=max_entries, num_args=num_args, @@ -460,7 +539,7 @@ def cachedList( def batch_do_something(self, first_arg, second_args): ... """ - func = lambda orig: CacheListDescriptor( + func = lambda orig: DeferredCacheListDescriptor( orig, cached_method_name=cached_method_name, list_name=list_name, @@ -468,3 +547,65 @@ def cachedList( ) return cast(Callable[[F], _CachedFunction[F]], func) + + +def get_cache_key_builder( + param_names: Sequence[str], param_defaults: Mapping[str, Any] +) -> Callable[[Sequence[Any], Mapping[str, Any]], CacheKey]: + """Construct a function which will build cache keys suitable for a cached function + + Args: + param_names: list of formal parameter names for the cached function + param_defaults: a mapping from parameter name to default value for that param + + Returns: + A function which will take an (args, kwargs) pair and return a cache key + """ + + # By default our cache key is a tuple, but if there is only one item + # then don't bother wrapping in a tuple. This is to save memory. + + if len(param_names) == 1: + nm = param_names[0] + + def get_cache_key(args: Sequence[Any], kwargs: Mapping[str, Any]) -> CacheKey: + if nm in kwargs: + return kwargs[nm] + elif len(args): + return args[0] + else: + return param_defaults[nm] + + else: + + def get_cache_key(args: Sequence[Any], kwargs: Mapping[str, Any]) -> CacheKey: + return tuple(_get_cache_key_gen(param_names, param_defaults, args, kwargs)) + + return get_cache_key + + +def _get_cache_key_gen( + param_names: Iterable[str], + param_defaults: Mapping[str, Any], + args: Sequence[Any], + kwargs: Mapping[str, Any], +) -> Iterable[Any]: + """Given some args/kwargs return a generator that resolves into + the cache_key. + + This is essentially the same operation as `inspect.getcallargs`, but optimised so + that we don't need to inspect the target function for each call. + """ + + # We loop through each arg name, looking up if its in the `kwargs`, + # otherwise using the next argument in `args`. If there are no more + # args then we try looking the arg name up in the defaults. + pos = 0 + for nm in param_names: + if nm in kwargs: + yield kwargs[nm] + elif pos < len(args): + yield args[pos] + pos += 1 + else: + yield param_defaults[nm] diff --git a/tests/replication/test_multi_media_repo.py b/tests/replication/test_multi_media_repo.py new file mode 100644
index 0000000000..77c261dbf7 --- /dev/null +++ b/tests/replication/test_multi_media_repo.py
@@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import os +from binascii import unhexlify +from typing import Tuple + +from twisted.internet.protocol import Factory +from twisted.protocols.tls import TLSMemoryBIOFactory +from twisted.web.http import HTTPChannel +from twisted.web.server import Request + +from synapse.rest import admin +from synapse.rest.client.v1 import login +from synapse.server import HomeServer + +from tests.http import TestServerTLSConnectionFactory, get_test_ca_cert_file +from tests.replication._base import BaseMultiWorkerStreamTestCase +from tests.server import FakeChannel, FakeTransport + +logger = logging.getLogger(__name__) + +test_server_connection_factory = None + + +class MediaRepoShardTestCase(BaseMultiWorkerStreamTestCase): + """Checks running multiple media repos work correctly. + """ + + servlets = [ + admin.register_servlets_for_client_rest_resource, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.user_id = self.register_user("user", "pass") + self.access_token = self.login("user", "pass") + + self.reactor.lookups["example.com"] = "127.0.0.2" + + def default_config(self): + conf = super().default_config() + conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] + return conf + + def _get_media_req( + self, hs: HomeServer, target: str, media_id: str + ) -> Tuple[FakeChannel, Request]: + """Request some remote media from the given HS by calling the download + API. + + This then triggers an outbound request from the HS to the target. + + Returns: + The channel for the *client* request and the *outbound* request for + the media which the caller should respond to. + """ + + request, channel = self.make_request( + "GET", + "/{}/{}".format(target, media_id), + shorthand=False, + access_token=self.access_token, + ) + request.render(hs.get_media_repository_resource().children[b"download"]) + self.pump() + + clients = self.reactor.tcpClients + self.assertGreaterEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop() + + # build the test server + server_tls_protocol = _build_test_server(get_connection_factory()) + + # now, tell the client protocol factory to build the client protocol (it will be a + # _WrappingProtocol, around a TLSMemoryBIOProtocol, around an + # HTTP11ClientProtocol) and wire the output of said protocol up to the server via + # a FakeTransport. + # + # Normally this would be done by the TCP socket code in Twisted, but we are + # stubbing that out here. + client_protocol = client_factory.buildProtocol(None) + client_protocol.makeConnection( + FakeTransport(server_tls_protocol, self.reactor, client_protocol) + ) + + # tell the server tls protocol to send its stuff back to the client, too + server_tls_protocol.makeConnection( + FakeTransport(client_protocol, self.reactor, server_tls_protocol) + ) + + # fish the test server back out of the server-side TLS protocol. + http_server = server_tls_protocol.wrappedProtocol + + # give the reactor a pump to get the TLS juices flowing. + self.reactor.pump((0.1,)) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + + self.assertEqual(request.method, b"GET") + self.assertEqual( + request.path, + "/_matrix/media/r0/download/{}/{}".format(target, media_id).encode("utf-8"), + ) + self.assertEqual( + request.requestHeaders.getRawHeaders(b"host"), [target.encode("utf-8")] + ) + + return channel, request + + def test_basic(self): + """Test basic fetching of remote media from a single worker. + """ + hs1 = self.make_worker_hs("synapse.app.generic_worker") + + channel, request = self._get_media_req(hs1, "example.com:443", "ABC123") + + request.setResponseCode(200) + request.responseHeaders.setRawHeaders(b"Content-Type", [b"text/plain"]) + request.write(b"Hello!") + request.finish() + + self.pump(0.1) + + self.assertEqual(channel.code, 200) + self.assertEqual(channel.result["body"], b"Hello!") + + def test_download_simple_file_race(self): + """Test that fetching remote media from two different processes at the + same time works. + """ + hs1 = self.make_worker_hs("synapse.app.generic_worker") + hs2 = self.make_worker_hs("synapse.app.generic_worker") + + start_count = self._count_remote_media() + + # Make two requests without responding to the outbound media requests. + channel1, request1 = self._get_media_req(hs1, "example.com:443", "ABC123") + channel2, request2 = self._get_media_req(hs2, "example.com:443", "ABC123") + + # Respond to the first outbound media request and check that the client + # request is successful + request1.setResponseCode(200) + request1.responseHeaders.setRawHeaders(b"Content-Type", [b"text/plain"]) + request1.write(b"Hello!") + request1.finish() + + self.pump(0.1) + + self.assertEqual(channel1.code, 200, channel1.result["body"]) + self.assertEqual(channel1.result["body"], b"Hello!") + + # Now respond to the second with the same content. + request2.setResponseCode(200) + request2.responseHeaders.setRawHeaders(b"Content-Type", [b"text/plain"]) + request2.write(b"Hello!") + request2.finish() + + self.pump(0.1) + + self.assertEqual(channel2.code, 200, channel2.result["body"]) + self.assertEqual(channel2.result["body"], b"Hello!") + + # We expect only one new file to have been persisted. + self.assertEqual(start_count + 1, self._count_remote_media()) + + def test_download_image_race(self): + """Test that fetching remote *images* from two different processes at + the same time works. + + This checks that races generating thumbnails are handled correctly. + """ + hs1 = self.make_worker_hs("synapse.app.generic_worker") + hs2 = self.make_worker_hs("synapse.app.generic_worker") + + start_count = self._count_remote_thumbnails() + + channel1, request1 = self._get_media_req(hs1, "example.com:443", "PIC1") + channel2, request2 = self._get_media_req(hs2, "example.com:443", "PIC1") + + png_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + request1.setResponseCode(200) + request1.responseHeaders.setRawHeaders(b"Content-Type", [b"image/png"]) + request1.write(png_data) + request1.finish() + + self.pump(0.1) + + self.assertEqual(channel1.code, 200, channel1.result["body"]) + self.assertEqual(channel1.result["body"], png_data) + + request2.setResponseCode(200) + request2.responseHeaders.setRawHeaders(b"Content-Type", [b"image/png"]) + request2.write(png_data) + request2.finish() + + self.pump(0.1) + + self.assertEqual(channel2.code, 200, channel2.result["body"]) + self.assertEqual(channel2.result["body"], png_data) + + # We expect only three new thumbnails to have been persisted. + self.assertEqual(start_count + 3, self._count_remote_thumbnails()) + + def _count_remote_media(self) -> int: + """Count the number of files in our remote media directory. + """ + path = os.path.join( + self.hs.get_media_repository().primary_base_path, "remote_content" + ) + return sum(len(files) for _, _, files in os.walk(path)) + + def _count_remote_thumbnails(self) -> int: + """Count the number of files in our remote thumbnails directory. + """ + path = os.path.join( + self.hs.get_media_repository().primary_base_path, "remote_thumbnail" + ) + return sum(len(files) for _, _, files in os.walk(path)) + + +def get_connection_factory(): + # this needs to happen once, but not until we are ready to run the first test + global test_server_connection_factory + if test_server_connection_factory is None: + test_server_connection_factory = TestServerTLSConnectionFactory( + sanlist=[b"DNS:example.com"] + ) + return test_server_connection_factory + + +def _build_test_server(connection_creator): + """Construct a test server + + This builds an HTTP channel, wrapped with a TLSMemoryBIOProtocol + + Args: + connection_creator (IOpenSSLServerConnectionCreator): thing to build + SSL connections + sanlist (list[bytes]): list of the SAN entries for the cert returned + by the server + + Returns: + TLSMemoryBIOProtocol + """ + server_factory = Factory.forProtocol(HTTPChannel) + # Request.finish expects the factory to have a 'log' method. + server_factory.log = _log_request + + server_tls_factory = TLSMemoryBIOFactory( + connection_creator, isClient=False, wrappedFactory=server_factory + ) + + return server_tls_factory.buildProtocol(None) + + +def _log_request(request): + """Implements Factory.log, which is expected by Request.finish""" + logger.info("Completed request %s", request) diff --git a/tests/server.py b/tests/server.py
index b97003fa5a..3dd2cfc072 100644 --- a/tests/server.py +++ b/tests/server.py
@@ -46,7 +46,7 @@ class FakeChannel: site = attr.ib(type=Site) _reactor = attr.ib() - result = attr.ib(default=attr.Factory(dict)) + result = attr.ib(type=dict, default=attr.Factory(dict)) _producer = None @property diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
index a298cc0fd3..d232b72264 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py
@@ -17,8 +17,10 @@ """ Utilities for running the unit tests """ +import sys +import warnings from asyncio import Future -from typing import Any, Awaitable, TypeVar +from typing import Any, Awaitable, Callable, TypeVar TV = TypeVar("TV") @@ -48,3 +50,33 @@ def make_awaitable(result: Any) -> Awaitable[Any]: future = Future() # type: ignore future.set_result(result) return future + + +def setup_awaitable_errors() -> Callable[[], None]: + """ + Convert warnings from a non-awaited coroutines into errors. + """ + warnings.simplefilter("error", RuntimeWarning) + + # unraisablehook was added in Python 3.8. + if not hasattr(sys, "unraisablehook"): + return lambda: None + + # State shared between unraisablehook and check_for_unraisable_exceptions. + unraisable_exceptions = [] + orig_unraisablehook = sys.unraisablehook # type: ignore + + def unraisablehook(unraisable): + unraisable_exceptions.append(unraisable.exc_value) + + def cleanup(): + """ + A method to be used as a clean-up that fails a test-case if there are any new unraisable exceptions. + """ + sys.unraisablehook = orig_unraisablehook # type: ignore + if unraisable_exceptions: + raise unraisable_exceptions.pop() + + sys.unraisablehook = unraisablehook # type: ignore + + return cleanup diff --git a/tests/unittest.py b/tests/unittest.py
index 257f465897..08cf9b10c5 100644 --- a/tests/unittest.py +++ b/tests/unittest.py
@@ -54,7 +54,7 @@ from tests.server import ( render, setup_test_homeserver, ) -from tests.test_utils import event_injection +from tests.test_utils import event_injection, setup_awaitable_errors from tests.test_utils.logging_setup import setup_logging from tests.utils import default_config, setupdb @@ -119,6 +119,10 @@ class TestCase(unittest.TestCase): logging.getLogger().setLevel(level) + # Trial messes with the warnings configuration, thus this has to be + # done in the context of an individual TestCase. + self.addCleanup(setup_awaitable_errors()) + return orig() @around(self) diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 2ad08f541b..cf1e3203a4 100644 --- a/tests/util/caches/test_descriptors.py +++ b/tests/util/caches/test_descriptors.py
@@ -29,13 +29,46 @@ from synapse.logging.context import ( make_deferred_yieldable, ) from synapse.util.caches import descriptors -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, lru_cache from tests import unittest +from tests.test_utils import get_awaitable_result logger = logging.getLogger(__name__) +class LruCacheDecoratorTestCase(unittest.TestCase): + def test_base(self): + class Cls: + def __init__(self): + self.mock = mock.Mock() + + @lru_cache() + def fn(self, arg1, arg2): + return self.mock(arg1, arg2) + + obj = Cls() + obj.mock.return_value = "fish" + r = obj.fn(1, 2) + self.assertEqual(r, "fish") + obj.mock.assert_called_once_with(1, 2) + obj.mock.reset_mock() + + # a call with different params should call the mock again + obj.mock.return_value = "chips" + r = obj.fn(1, 3) + self.assertEqual(r, "chips") + obj.mock.assert_called_once_with(1, 3) + obj.mock.reset_mock() + + # the two values should now be cached + r = obj.fn(1, 2) + self.assertEqual(r, "fish") + r = obj.fn(1, 3) + self.assertEqual(r, "chips") + obj.mock.assert_not_called() + + def run_on_reactor(): d = defer.Deferred() reactor.callLater(0, d.callback, 0) @@ -362,6 +395,31 @@ class DescriptorTestCase(unittest.TestCase): d = obj.fn(1) self.failureResultOf(d, SynapseError) + def test_invalidate_cascade(self): + """Invalidations should cascade up through cache contexts""" + + class Cls: + @cached(cache_context=True) + async def func1(self, key, cache_context): + return await self.func2(key, on_invalidate=cache_context.invalidate) + + @cached(cache_context=True) + async def func2(self, key, cache_context): + return self.func3(key, on_invalidate=cache_context.invalidate) + + @lru_cache(cache_context=True) + def func3(self, key, cache_context): + self.invalidate = cache_context.invalidate + return 42 + + obj = Cls() + + top_invalidate = mock.Mock() + r = get_awaitable_result(obj.func1("k1", on_invalidate=top_invalidate)) + self.assertEqual(r, 42) + obj.invalidate() + top_invalidate.assert_called_once() + class CacheDecoratorTestCase(unittest.HomeserverTestCase): """More tests for @cached