From d0d8a22c13427cce341dbb7ae1d92d2c0ae709c3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 28 Oct 2019 13:33:04 +0000
Subject: Quick fix to ensure cache descriptors always return deferreds

---
 tests/util/caches/test_descriptors.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tests/util')

diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 5713870f48..f907903511 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -325,9 +325,9 @@ class DescriptorTestCase(unittest.TestCase):
         self.assertEqual(len(obj.fn.cache.cache), 3)
 
         r = obj.fn(1, 2)
-        self.assertEqual(r, ["spam", "eggs"])
+        self.assertEqual(r.result, ["spam", "eggs"])
         r = obj.fn(1, 3)
-        self.assertEqual(r, ["chips"])
+        self.assertEqual(r.result, ["chips"])
         obj.mock.assert_not_called()
 
     def test_cache_iterable_with_sync_exception(self):
-- 
cgit 1.4.1


From 326b3dace77aeb36e516ea9b04ba1baa171bcb47 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 30 Oct 2019 11:35:46 +0000
Subject: Make ObservableDeferred.observe() always return deferred.

This makes it easier to use in an async/await world.

Also fixes a bug where cache descriptors would occaisonally return a raw
value rather than a deferred.
---
 synapse/util/async_helpers.py         | 7 ++-----
 tests/storage/test__base.py           | 2 +-
 tests/util/caches/test_descriptors.py | 4 ++--
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'tests/util')

diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 7659eaeb42..fd75ba27ad 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -86,11 +86,8 @@ class ObservableDeferred(object):
 
         deferred.addCallbacks(callback, errback)
 
-    def observe(self):
+    def observe(self) -> defer.Deferred:
         """Observe the underlying deferred.
-
-        Can return either a deferred if the underlying deferred is still pending
-        (or has failed), or the actual value. Callers may need to use maybeDeferred.
         """
         if not self._result:
             d = defer.Deferred()
@@ -105,7 +102,7 @@ class ObservableDeferred(object):
             return d
         else:
             success, res = self._result
-            return res if success else defer.fail(res)
+            return defer.succeed(res) if success else defer.fail(res)
 
     def observers(self):
         return self._observers
diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py
index dd49a14524..9b81b536f5 100644
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@@ -197,7 +197,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
 
         a.func.prefill(("foo",), ObservableDeferred(d))
 
-        self.assertEquals(a.func("foo"), d.result)
+        self.assertEquals(a.func("foo").result, d.result)
         self.assertEquals(callcount[0], 0)
 
     @defer.inlineCallbacks
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index f907903511..39e360fe24 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -310,14 +310,14 @@ class DescriptorTestCase(unittest.TestCase):
 
         obj.mock.return_value = ["spam", "eggs"]
         r = obj.fn(1, 2)
-        self.assertEqual(r, ["spam", "eggs"])
+        self.assertEqual(r.result, ["spam", "eggs"])
         obj.mock.assert_called_once_with(1, 2)
         obj.mock.reset_mock()
 
         # a call with different params should call the mock again
         obj.mock.return_value = ["chips"]
         r = obj.fn(1, 3)
-        self.assertEqual(r, ["chips"])
+        self.assertEqual(r.result, ["chips"])
         obj.mock.assert_called_once_with(1, 3)
         obj.mock.reset_mock()
 
-- 
cgit 1.4.1


From f166a8d1f52f1fda1a64d8cd50f17275d63c8843 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Dec 2019 13:42:49 +0000
Subject: Remove SnapshotCache in favour of ResponseCache

---
 synapse/handlers/initial_sync.py      | 19 +++----
 synapse/util/caches/snapshot_cache.py | 94 -----------------------------------
 tests/util/test_snapshot_cache.py     | 63 -----------------------
 3 files changed, 8 insertions(+), 168 deletions(-)
 delete mode 100644 synapse/util/caches/snapshot_cache.py
 delete mode 100644 tests/util/test_snapshot_cache.py

(limited to 'tests/util')

diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 81dce96f4b..73c110a92b 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -26,7 +26,7 @@ from synapse.streams.config import PaginationConfig
 from synapse.types import StreamToken, UserID
 from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import concurrently_execute
-from synapse.util.caches.snapshot_cache import SnapshotCache
+from synapse.util.caches.response_cache import ResponseCache
 from synapse.visibility import filter_events_for_client
 
 from ._base import BaseHandler
@@ -41,7 +41,7 @@ class InitialSyncHandler(BaseHandler):
         self.state = hs.get_state_handler()
         self.clock = hs.get_clock()
         self.validator = EventValidator()
-        self.snapshot_cache = SnapshotCache()
+        self.snapshot_cache = ResponseCache(hs, "initial_sync_cache")
         self._event_serializer = hs.get_event_client_serializer()
         self.storage = hs.get_storage()
         self.state_store = self.storage.state
@@ -79,17 +79,14 @@ class InitialSyncHandler(BaseHandler):
             as_client_event,
             include_archived,
         )
-        now_ms = self.clock.time_msec()
-        result = self.snapshot_cache.get(now_ms, key)
-        if result is not None:
-            return result
 
-        return self.snapshot_cache.set(
-            now_ms,
+        return self.snapshot_cache.wrap(
             key,
-            self._snapshot_all_rooms(
-                user_id, pagin_config, as_client_event, include_archived
-            ),
+            self._snapshot_all_rooms,
+            user_id,
+            pagin_config,
+            as_client_event,
+            include_archived,
         )
 
     @defer.inlineCallbacks
diff --git a/synapse/util/caches/snapshot_cache.py b/synapse/util/caches/snapshot_cache.py
deleted file mode 100644
index 8318db8d2c..0000000000
--- a/synapse/util/caches/snapshot_cache.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.util.async_helpers import ObservableDeferred
-
-
-class SnapshotCache(object):
-    """Cache for snapshots like the response of /initialSync.
-    The response of initialSync only has to be a recent snapshot of the
-    server state. It shouldn't matter to clients if it is a few minutes out
-    of date.
-
-    This caches a deferred response. Until the deferred completes it will be
-    returned from the cache. This means that if the client retries the request
-    while the response is still being computed, that original response will be
-    used rather than trying to compute a new response.
-
-    Once the deferred completes it will removed from the cache after 5 minutes.
-    We delay removing it from the cache because a client retrying its request
-    could race with us finishing computing the response.
-
-    Rather than tracking precisely how long something has been in the cache we
-    keep two generations of completed responses. Every 5 minutes discard the
-    old generation, move the new generation to the old generation, and set the
-    new generation to be empty. This means that a result will be in the cache
-    somewhere between 5 and 10 minutes.
-    """
-
-    DURATION_MS = 5 * 60 * 1000  # Cache results for 5 minutes.
-
-    def __init__(self):
-        self.pending_result_cache = {}  # Request that haven't finished yet.
-        self.prev_result_cache = {}  # The older requests that have finished.
-        self.next_result_cache = {}  # The newer requests that have finished.
-        self.time_last_rotated_ms = 0
-
-    def rotate(self, time_now_ms):
-        # Rotate once if the cache duration has passed since the last rotation.
-        if time_now_ms - self.time_last_rotated_ms >= self.DURATION_MS:
-            self.prev_result_cache = self.next_result_cache
-            self.next_result_cache = {}
-            self.time_last_rotated_ms += self.DURATION_MS
-
-        # Rotate again if the cache duration has passed twice since the last
-        # rotation.
-        if time_now_ms - self.time_last_rotated_ms >= self.DURATION_MS:
-            self.prev_result_cache = self.next_result_cache
-            self.next_result_cache = {}
-            self.time_last_rotated_ms = time_now_ms
-
-    def get(self, time_now_ms, key):
-        self.rotate(time_now_ms)
-        # This cache is intended to deduplicate requests, so we expect it to be
-        # missed most of the time. So we just lookup the key in all of the
-        # dictionaries rather than trying to short circuit the lookup if the
-        # key is found.
-        result = self.prev_result_cache.get(key)
-        result = self.next_result_cache.get(key, result)
-        result = self.pending_result_cache.get(key, result)
-        if result is not None:
-            return result.observe()
-        else:
-            return None
-
-    def set(self, time_now_ms, key, deferred):
-        self.rotate(time_now_ms)
-
-        result = ObservableDeferred(deferred)
-
-        self.pending_result_cache[key] = result
-
-        def shuffle_along(r):
-            # When the deferred completes we shuffle it along to the first
-            # generation of the result cache. So that it will eventually
-            # expire from the rotation of that cache.
-            self.next_result_cache[key] = result
-            self.pending_result_cache.pop(key, None)
-            return r
-
-        result.addBoth(shuffle_along)
-
-        return result.observe()
diff --git a/tests/util/test_snapshot_cache.py b/tests/util/test_snapshot_cache.py
deleted file mode 100644
index 1a44f72425..0000000000
--- a/tests/util/test_snapshot_cache.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from twisted.internet.defer import Deferred
-
-from synapse.util.caches.snapshot_cache import SnapshotCache
-
-from .. import unittest
-
-
-class SnapshotCacheTestCase(unittest.TestCase):
-    def setUp(self):
-        self.cache = SnapshotCache()
-        self.cache.DURATION_MS = 1
-
-    def test_get_set(self):
-        # Check that getting a missing key returns None
-        self.assertEquals(self.cache.get(0, "key"), None)
-
-        # Check that setting a key with a deferred returns
-        # a deferred that resolves when the initial deferred does
-        d = Deferred()
-        set_result = self.cache.set(0, "key", d)
-        self.assertIsNotNone(set_result)
-        self.assertFalse(set_result.called)
-
-        # Check that getting the key before the deferred has resolved
-        # returns a deferred that resolves when the initial deferred does.
-        get_result_at_10 = self.cache.get(10, "key")
-        self.assertIsNotNone(get_result_at_10)
-        self.assertFalse(get_result_at_10.called)
-
-        # Check that the returned deferreds resolve when the initial deferred
-        # does.
-        d.callback("v")
-        self.assertTrue(set_result.called)
-        self.assertTrue(get_result_at_10.called)
-
-        # Check that getting the key after the deferred has resolved
-        # before the cache expires returns a resolved deferred.
-        get_result_at_11 = self.cache.get(11, "key")
-        self.assertIsNotNone(get_result_at_11)
-        if isinstance(get_result_at_11, Deferred):
-            # The cache may return the actual result rather than a deferred
-            self.assertTrue(get_result_at_11.called)
-
-        # Check that getting the key after the deferred has resolved
-        # after the cache expires returns None
-        get_result_at_12 = self.cache.get(12, "key")
-        self.assertIsNone(get_result_at_12)
-- 
cgit 1.4.1


From 9a2223d4c8c2a46f7ab072597bdba2614bc3e6ea Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 10 Dec 2019 11:22:12 +0000
Subject: Fix make_deferred_yieldable to work with coroutines

---
 synapse/logging/context.py    |  9 ++++++++-
 tests/util/test_logcontext.py | 24 ++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'tests/util')

diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 2c1fb9ddac..9f484ce59e 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -23,6 +23,7 @@ them.
 See doc/log_contexts.rst for details on how this works.
 """
 
+import inspect
 import logging
 import threading
 import types
@@ -612,7 +613,8 @@ def run_in_background(f, *args, **kwargs):
 
 
 def make_deferred_yieldable(deferred):
-    """Given a deferred, make it follow the Synapse logcontext rules:
+    """Given a deferred (or coroutine), make it follow the Synapse logcontext
+    rules:
 
     If the deferred has completed (or is not actually a Deferred), essentially
     does nothing (just returns another completed deferred with the
@@ -624,6 +626,11 @@ def make_deferred_yieldable(deferred):
 
     (This is more-or-less the opposite operation to run_in_background.)
     """
+    if inspect.isawaitable(deferred):
+        # If we're given a coroutine we need to convert it to a deferred so that
+        # we can attach callbacks (and not immediately return).
+        deferred = defer.ensureDeferred(deferred)
+
     if not isinstance(deferred, defer.Deferred):
         return deferred
 
diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py
index 8b8455c8b7..281b32c4b8 100644
--- a/tests/util/test_logcontext.py
+++ b/tests/util/test_logcontext.py
@@ -179,6 +179,30 @@ class LoggingContextTestCase(unittest.TestCase):
             nested_context = nested_logging_context(suffix="bar")
             self.assertEqual(nested_context.request, "foo-bar")
 
+    @defer.inlineCallbacks
+    def test_make_deferred_yieldable_with_await(self):
+        # an async function which retuns an incomplete coroutine, but doesn't
+        # follow the synapse rules.
+
+        async def blocking_function():
+            d = defer.Deferred()
+            reactor.callLater(0, d.callback, None)
+            await d
+
+        sentinel_context = LoggingContext.current_context()
+
+        with LoggingContext() as context_one:
+            context_one.request = "one"
+
+            d1 = make_deferred_yieldable(blocking_function())
+            # make sure that the context was reset by make_deferred_yieldable
+            self.assertIs(LoggingContext.current_context(), sentinel_context)
+
+            yield d1
+
+            # now it should be restored
+            self._check_test_key("one")
+
 
 # a function which returns a deferred which has been "called", but
 # which had a function which returned another incomplete deferred on
-- 
cgit 1.4.1


From acc7820574426cf27673d941b1b0362272113351 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 16 Jan 2020 22:26:34 +0000
Subject: Log saml assertions rather than the whole response

... since the whole response is huge.

We even need to break up the assertions, since kibana otherwise truncates them.
---
 synapse/handlers/saml_handler.py | 13 ++++++++++-
 synapse/util/iterutils.py        | 13 +++++++++++
 tests/util/test_itertools.py     | 47 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 tests/util/test_itertools.py

(limited to 'tests/util')

diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py
index 107f97032b..32638671c9 100644
--- a/synapse/handlers/saml_handler.py
+++ b/synapse/handlers/saml_handler.py
@@ -32,6 +32,7 @@ from synapse.types import (
     mxid_localpart_allowed_characters,
 )
 from synapse.util.async_helpers import Linearizer
+from synapse.util.iterutils import chunk_seq
 
 logger = logging.getLogger(__name__)
 
@@ -132,7 +133,17 @@ class SamlHandler:
             logger.warning("SAML2 response was not signed")
             raise SynapseError(400, "SAML2 response was not signed")
 
-        logger.info("SAML2 response: %s", saml2_auth.origxml)
+        logger.debug("SAML2 response: %s", saml2_auth.origxml)
+        for assertion in saml2_auth.assertions:
+            # kibana limits the length of a log field, whereas this is all rather
+            # useful, so split it up.
+            count = 0
+            for part in chunk_seq(str(assertion), 10000):
+                logger.info(
+                    "SAML2 assertion: %s%s", "(%i)..." % (count,) if count else "", part
+                )
+                count += 1
+
         logger.info("SAML2 mapped attributes: %s", saml2_auth.ava)
 
         try:
diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py
index c10016fbc5..06faeebe7f 100644
--- a/synapse/util/iterutils.py
+++ b/synapse/util/iterutils.py
@@ -33,3 +33,16 @@ def batch_iter(iterable: Iterable[T], size: int) -> Iterator[Tuple[T]]:
     sourceiter = iter(iterable)
     # call islice until it returns an empty tuple
     return iter(lambda: tuple(islice(sourceiter, size)), ())
+
+
+ISeq = TypeVar("ISeq", bound=Sequence, covariant=True)
+
+
+def chunk_seq(iseq: ISeq, maxlen: int) -> Iterable[ISeq]:
+    """Split the given sequence into chunks of the given size
+
+    The last chunk may be shorter than the given size.
+
+    If the input is empty, no chunks are returned.
+    """
+    return (iseq[i : i + maxlen] for i in range(0, len(iseq), maxlen))
diff --git a/tests/util/test_itertools.py b/tests/util/test_itertools.py
new file mode 100644
index 0000000000..0ab0a91483
--- /dev/null
+++ b/tests/util/test_itertools.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from synapse.util.iterutils import chunk_seq
+
+from tests.unittest import TestCase
+
+
+class ChunkSeqTests(TestCase):
+    def test_short_seq(self):
+        parts = chunk_seq("123", 8)
+
+        self.assertEqual(
+            list(parts), ["123"],
+        )
+
+    def test_long_seq(self):
+        parts = chunk_seq("abcdefghijklmnop", 8)
+
+        self.assertEqual(
+            list(parts), ["abcdefgh", "ijklmnop"],
+        )
+
+    def test_uneven_parts(self):
+        parts = chunk_seq("abcdefghijklmnop", 5)
+
+        self.assertEqual(
+            list(parts), ["abcde", "fghij", "klmno", "p"],
+        )
+
+    def test_empty_input(self):
+        parts = chunk_seq([], 5)
+
+        self.assertEqual(
+            list(parts), [],
+        )
-- 
cgit 1.4.1


From 9f7aaf90b5ef76416852f35201a851d45eccc0a1 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 24 Jan 2020 14:28:40 +0000
Subject: Validate client_secret parameter (#6767)

---
 changelog.d/6767.bugfix                  |  1 +
 synapse/handlers/identity.py             |  4 ++-
 synapse/rest/client/v2_alpha/account.py  | 23 ++++++++++----
 synapse/rest/client/v2_alpha/register.py |  3 ++
 synapse/util/stringutils.py              | 17 +++++++++++
 tests/util/test_stringutils.py           | 51 ++++++++++++++++++++++++++++++++
 6 files changed, 93 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/6767.bugfix
 create mode 100644 tests/util/test_stringutils.py

(limited to 'tests/util')

diff --git a/changelog.d/6767.bugfix b/changelog.d/6767.bugfix
new file mode 100644
index 0000000000..63c7c63315
--- /dev/null
+++ b/changelog.d/6767.bugfix
@@ -0,0 +1 @@
+Validate `client_secret` parameter using the regex provided by the Client-Server API, temporarily allowing `:` characters for older clients. The `:` character will be removed in a future release.
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 000fbf090f..23f07832e7 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -38,7 +38,7 @@ from synapse.api.errors import (
 from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http.client import SimpleHttpClient
 from synapse.util.hash import sha256_and_url_safe_base64
-from synapse.util.stringutils import random_string
+from synapse.util.stringutils import assert_valid_client_secret, random_string
 
 from ._base import BaseHandler
 
@@ -84,6 +84,8 @@ class IdentityHandler(BaseHandler):
             raise SynapseError(
                 400, "Missing param client_secret in creds", errcode=Codes.MISSING_PARAM
             )
+        assert_valid_client_secret(client_secret)
+
         session_id = creds.get("sid")
         if not session_id:
             raise SynapseError(
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index fc240f5cf8..dc837d6c75 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -30,6 +30,7 @@ from synapse.http.servlet import (
 )
 from synapse.push.mailer import Mailer, load_jinja2_templates
 from synapse.util.msisdn import phone_number_to_msisdn
+from synapse.util.stringutils import assert_valid_client_secret
 from synapse.util.threepids import check_3pid_allowed
 
 from ._base import client_patterns, interactive_auth_handler
@@ -81,6 +82,8 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
 
         # Extract params from body
         client_secret = body["client_secret"]
+        assert_valid_client_secret(client_secret)
+
         email = body["email"]
         send_attempt = body["send_attempt"]
         next_link = body.get("next_link")  # Optional param
@@ -166,8 +169,9 @@ class PasswordResetSubmitTokenServlet(RestServlet):
             )
 
         sid = parse_string(request, "sid", required=True)
-        client_secret = parse_string(request, "client_secret", required=True)
         token = parse_string(request, "token", required=True)
+        client_secret = parse_string(request, "client_secret", required=True)
+        assert_valid_client_secret(client_secret)
 
         # Attempt to validate a 3PID session
         try:
@@ -353,6 +357,8 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
         body = parse_json_object_from_request(request)
         assert_params_in_dict(body, ["client_secret", "email", "send_attempt"])
         client_secret = body["client_secret"]
+        assert_valid_client_secret(client_secret)
+
         email = body["email"]
         send_attempt = body["send_attempt"]
         next_link = body.get("next_link")  # Optional param
@@ -413,6 +419,8 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
             body, ["client_secret", "country", "phone_number", "send_attempt"]
         )
         client_secret = body["client_secret"]
+        assert_valid_client_secret(client_secret)
+
         country = body["country"]
         phone_number = body["phone_number"]
         send_attempt = body["send_attempt"]
@@ -493,8 +501,9 @@ class AddThreepidEmailSubmitTokenServlet(RestServlet):
             )
 
         sid = parse_string(request, "sid", required=True)
-        client_secret = parse_string(request, "client_secret", required=True)
         token = parse_string(request, "token", required=True)
+        client_secret = parse_string(request, "client_secret", required=True)
+        assert_valid_client_secret(client_secret)
 
         # Attempt to validate a 3PID session
         try:
@@ -559,6 +568,7 @@ class AddThreepidMsisdnSubmitTokenServlet(RestServlet):
 
         body = parse_json_object_from_request(request)
         assert_params_in_dict(body, ["client_secret", "sid", "token"])
+        assert_valid_client_secret(body["client_secret"])
 
         # Proxy submit_token request to msisdn threepid delegate
         response = await self.identity_handler.proxy_msisdn_submit_token(
@@ -600,8 +610,9 @@ class ThreepidRestServlet(RestServlet):
             )
         assert_params_in_dict(threepid_creds, ["client_secret", "sid"])
 
-        client_secret = threepid_creds["client_secret"]
         sid = threepid_creds["sid"]
+        client_secret = threepid_creds["client_secret"]
+        assert_valid_client_secret(client_secret)
 
         validation_session = await self.identity_handler.validate_threepid_session(
             client_secret, sid
@@ -637,8 +648,9 @@ class ThreepidAddRestServlet(RestServlet):
         body = parse_json_object_from_request(request)
 
         assert_params_in_dict(body, ["client_secret", "sid"])
-        client_secret = body["client_secret"]
         sid = body["sid"]
+        client_secret = body["client_secret"]
+        assert_valid_client_secret(client_secret)
 
         await self.auth_handler.validate_user_via_ui_auth(
             requester, body, self.hs.get_ip_from_request(request)
@@ -676,8 +688,9 @@ class ThreepidBindRestServlet(RestServlet):
         assert_params_in_dict(body, ["id_server", "sid", "client_secret"])
         id_server = body["id_server"]
         sid = body["sid"]
-        client_secret = body["client_secret"]
         id_access_token = body.get("id_access_token")  # optional
+        client_secret = body["client_secret"]
+        assert_valid_client_secret(client_secret)
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 1bda9aec7e..a09189b1b4 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -49,6 +49,7 @@ from synapse.http.servlet import (
 from synapse.push.mailer import load_jinja2_templates
 from synapse.util.msisdn import phone_number_to_msisdn
 from synapse.util.ratelimitutils import FederationRateLimiter
+from synapse.util.stringutils import assert_valid_client_secret
 from synapse.util.threepids import check_3pid_allowed
 
 from ._base import client_patterns, interactive_auth_handler
@@ -116,6 +117,8 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
 
         # Extract params from body
         client_secret = body["client_secret"]
+        assert_valid_client_secret(client_secret)
+
         email = body["email"]
         send_attempt = body["send_attempt"]
         next_link = body.get("next_link")  # Optional param
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 982c6d81ca..2c0dcb5208 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,14 +15,22 @@
 # limitations under the License.
 
 import random
+import re
 import string
 
 import six
 from six import PY2, PY3
 from six.moves import range
 
+from synapse.api.errors import Codes, SynapseError
+
 _string_with_symbols = string.digits + string.ascii_letters + ".,;:^&*-_+=#~@"
 
+# https://matrix.org/docs/spec/client_server/r0.6.0#post-matrix-client-r0-register-email-requesttoken
+# Note: The : character is allowed here for older clients, but will be removed in a
+# future release. Context: https://github.com/matrix-org/synapse/issues/6766
+client_secret_regex = re.compile(r"^[0-9a-zA-Z\.\=\_\-\:]+$")
+
 # random_string and random_string_with_symbols are used for a range of things,
 # some cryptographically important, some less so. We use SystemRandom to make sure
 # we get cryptographically-secure randoms.
@@ -109,3 +118,11 @@ def exception_to_unicode(e):
         return msg.decode("utf-8", errors="replace")
     else:
         return msg
+
+
+def assert_valid_client_secret(client_secret):
+    """Validate that a given string matches the client_secret regex defined by the spec"""
+    if client_secret_regex.match(client_secret) is None:
+        raise SynapseError(
+            400, "Invalid client_secret parameter", errcode=Codes.INVALID_PARAM
+        )
diff --git a/tests/util/test_stringutils.py b/tests/util/test_stringutils.py
new file mode 100644
index 0000000000..4f4da29a98
--- /dev/null
+++ b/tests/util/test_stringutils.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.api.errors import SynapseError
+from synapse.util.stringutils import assert_valid_client_secret
+
+from .. import unittest
+
+
+class StringUtilsTestCase(unittest.TestCase):
+    def test_client_secret_regex(self):
+        """Ensure that client_secret does not contain illegal characters"""
+        good = [
+            "abcde12345",
+            "ABCabc123",
+            "_--something==_",
+            "...--==-18913",
+            "8Dj2odd-e9asd.cd==_--ddas-secret-",
+            # We temporarily allow : characters: https://github.com/matrix-org/synapse/issues/6766
+            # To be removed in a future release
+            "SECRET:1234567890",
+        ]
+
+        bad = [
+            "--+-/secret",
+            "\\dx--dsa288",
+            "",
+            "AAS//",
+            "asdj**",
+            ">X><Z<!!-)))",
+            "a@b.com",
+        ]
+
+        for client_secret in good:
+            assert_valid_client_secret(client_secret)
+
+        for client_secret in bad:
+            with self.assertRaises(SynapseError):
+                assert_valid_client_secret(client_secret)
-- 
cgit 1.4.1


From 509e381afa8c656e72f5fef3d651a9819794174a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 21 Feb 2020 07:15:07 -0500
Subject: Clarify list/set/dict/tuple comprehensions and enforce via flake8
 (#6957)

Ensure good comprehension hygiene using flake8-comprehensions.
---
 CONTRIBUTING.md                                    |  2 +-
 changelog.d/6957.misc                              |  1 +
 docs/code_style.md                                 |  2 +-
 scripts-dev/convert_server_keys.py                 |  2 +-
 synapse/app/_base.py                               |  2 +-
 synapse/app/federation_sender.py                   |  4 +--
 synapse/app/pusher.py                              |  2 +-
 synapse/config/server.py                           |  4 +--
 synapse/config/tls.py                              |  2 +-
 synapse/crypto/keyring.py                          |  6 ++--
 synapse/federation/send_queue.py                   |  4 +--
 synapse/groups/groups_server.py                    |  2 +-
 synapse/handlers/device.py                         |  2 +-
 synapse/handlers/directory.py                      |  4 +--
 synapse/handlers/federation.py                     | 18 +++++-----
 synapse/handlers/presence.py                       |  6 ++--
 synapse/handlers/receipts.py                       |  2 +-
 synapse/handlers/room.py                           |  2 +-
 synapse/handlers/search.py                         |  8 ++---
 synapse/handlers/sync.py                           | 22 ++++++------
 synapse/handlers/typing.py                         |  4 +--
 synapse/logging/utils.py                           |  2 +-
 synapse/metrics/__init__.py                        |  2 +-
 synapse/metrics/background_process_metrics.py      |  4 +--
 synapse/push/bulk_push_rule_evaluator.py           |  8 ++---
 synapse/push/emailpusher.py                        |  2 +-
 synapse/push/mailer.py                             | 20 +++++------
 synapse/push/pusherpool.py                         |  2 +-
 synapse/rest/admin/_base.py                        |  4 +--
 synapse/rest/client/v1/push_rule.py                |  6 ++--
 synapse/rest/client/v1/pusher.py                   |  4 +--
 synapse/rest/client/v2_alpha/sync.py               |  2 +-
 synapse/rest/key/v2/remote_key_resource.py         |  2 +-
 synapse/rest/media/v1/_base.py                     | 40 ++++++++++------------
 synapse/state/v1.py                                | 10 +++---
 synapse/state/v2.py                                |  8 ++---
 synapse/storage/_base.py                           |  2 +-
 synapse/storage/background_updates.py              |  2 +-
 synapse/storage/data_stores/main/appservice.py     | 14 ++++----
 synapse/storage/data_stores/main/client_ips.py     |  4 +--
 synapse/storage/data_stores/main/devices.py        | 13 ++++---
 .../storage/data_stores/main/event_federation.py   |  2 +-
 synapse/storage/data_stores/main/events.py         |  8 ++---
 .../storage/data_stores/main/events_bg_updates.py  |  2 +-
 synapse/storage/data_stores/main/events_worker.py  |  6 ++--
 synapse/storage/data_stores/main/push_rule.py      |  8 ++---
 synapse/storage/data_stores/main/receipts.py       |  4 +--
 synapse/storage/data_stores/main/roommember.py     |  4 +--
 synapse/storage/data_stores/main/state.py          |  8 ++---
 synapse/storage/data_stores/main/stream.py         |  8 ++---
 .../storage/data_stores/main/user_erasure_store.py |  4 +--
 synapse/storage/data_stores/state/store.py         |  4 +--
 synapse/storage/database.py                        |  4 +--
 synapse/storage/persist_events.py                  |  8 ++---
 synapse/storage/prepare_database.py                |  6 ++--
 synapse/util/frozenutils.py                        |  2 +-
 synapse/visibility.py                              |  4 +--
 tests/config/test_generate.py                      |  2 +-
 tests/federation/test_federation_server.py         |  2 +-
 tests/handlers/test_presence.py                    |  4 +--
 tests/handlers/test_typing.py                      |  6 ++--
 tests/handlers/test_user_directory.py              | 12 +++----
 tests/push/test_email.py                           |  6 ++--
 tests/push/test_http.py                            |  8 ++---
 tests/rest/client/v2_alpha/test_sync.py            | 28 ++++++++-------
 tests/storage/test__base.py                        |  4 +--
 tests/storage/test_appservice.py                   | 36 +++++++++----------
 tests/storage/test_cleanup_extrems.py              | 10 +++---
 tests/storage/test_event_metrics.py                | 36 +++++++++----------
 tests/storage/test_state.py                        |  2 +-
 tests/test_state.py                                | 18 +++-------
 tests/util/test_stream_change_cache.py             | 18 +++-------
 tox.ini                                            |  1 +
 73 files changed, 251 insertions(+), 276 deletions(-)
 create mode 100644 changelog.d/6957.misc

(limited to 'tests/util')

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4b01b6ac8c..253a0ca648 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -60,7 +60,7 @@ python 3.6 and to install each tool:
 
 ```
 # Install the dependencies
-pip install -U black flake8 isort
+pip install -U black flake8 flake8-comprehensions isort
 
 # Run the linter script
 ./scripts-dev/lint.sh
diff --git a/changelog.d/6957.misc b/changelog.d/6957.misc
new file mode 100644
index 0000000000..4f98030110
--- /dev/null
+++ b/changelog.d/6957.misc
@@ -0,0 +1 @@
+Use flake8-comprehensions to enforce good hygiene of list/set/dict comprehensions.
diff --git a/docs/code_style.md b/docs/code_style.md
index 71aecd41f7..6ef6f80290 100644
--- a/docs/code_style.md
+++ b/docs/code_style.md
@@ -30,7 +30,7 @@ The necessary tools are detailed below.
 
     Install `flake8` with:
 
-        pip install --upgrade flake8
+        pip install --upgrade flake8 flake8-comprehensions
 
     Check all application and test code with:
 
diff --git a/scripts-dev/convert_server_keys.py b/scripts-dev/convert_server_keys.py
index 179be61c30..06b4c1e2ff 100644
--- a/scripts-dev/convert_server_keys.py
+++ b/scripts-dev/convert_server_keys.py
@@ -103,7 +103,7 @@ def main():
 
     yaml.safe_dump(result, sys.stdout, default_flow_style=False)
 
-    rows = list(row for server, json in result.items() for row in rows_v2(server, json))
+    rows = [row for server, json in result.items() for row in rows_v2(server, json)]
 
     cursor = connection.cursor()
     cursor.executemany(
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 109b1e2fb5..9ffd23c6df 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -141,7 +141,7 @@ def start_reactor(
 
 def quit_with_error(error_string):
     message_lines = error_string.split("\n")
-    line_length = max([len(l) for l in message_lines if len(l) < 80]) + 2
+    line_length = max(len(l) for l in message_lines if len(l) < 80) + 2
     sys.stderr.write("*" * line_length + "\n")
     for line in message_lines:
         sys.stderr.write(" %s\n" % (line.rstrip(),))
diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py
index 63a91f1177..b7fcf80ddc 100644
--- a/synapse/app/federation_sender.py
+++ b/synapse/app/federation_sender.py
@@ -262,7 +262,7 @@ class FederationSenderHandler(object):
 
         # ... as well as device updates and messages
         elif stream_name == DeviceListsStream.NAME:
-            hosts = set(row.destination for row in rows)
+            hosts = {row.destination for row in rows}
             for host in hosts:
                 self.federation_sender.send_device_messages(host)
 
@@ -270,7 +270,7 @@ class FederationSenderHandler(object):
             # The to_device stream includes stuff to be pushed to both local
             # clients and remote servers, so we ignore entities that start with
             # '@' (since they'll be local users rather than destinations).
-            hosts = set(row.entity for row in rows if not row.entity.startswith("@"))
+            hosts = {row.entity for row in rows if not row.entity.startswith("@")}
             for host in hosts:
                 self.federation_sender.send_device_messages(host)
 
diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py
index e46b6ac598..84e9f8d5e2 100644
--- a/synapse/app/pusher.py
+++ b/synapse/app/pusher.py
@@ -158,7 +158,7 @@ class PusherReplicationHandler(ReplicationClientHandler):
                 yield self.pusher_pool.on_new_notifications(token, token)
             elif stream_name == "receipts":
                 yield self.pusher_pool.on_new_receipts(
-                    token, token, set(row.room_id for row in rows)
+                    token, token, {row.room_id for row in rows}
                 )
         except Exception:
             logger.exception("Error poking pushers")
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 0ec1b0fadd..7525765fee 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -1066,12 +1066,12 @@ KNOWN_RESOURCES = (
 
 
 def _check_resource_config(listeners):
-    resource_names = set(
+    resource_names = {
         res_name
         for listener in listeners
         for res in listener.get("resources", [])
         for res_name in res.get("names", [])
-    )
+    }
 
     for resource in resource_names:
         if resource not in KNOWN_RESOURCES:
diff --git a/synapse/config/tls.py b/synapse/config/tls.py
index 97a12d51f6..a65538562b 100644
--- a/synapse/config/tls.py
+++ b/synapse/config/tls.py
@@ -260,7 +260,7 @@ class TlsConfig(Config):
                 crypto.FILETYPE_ASN1, self.tls_certificate
             )
             sha256_fingerprint = encode_base64(sha256(x509_certificate_bytes).digest())
-            sha256_fingerprints = set(f["sha256"] for f in self.tls_fingerprints)
+            sha256_fingerprints = {f["sha256"] for f in self.tls_fingerprints}
             if sha256_fingerprint not in sha256_fingerprints:
                 self.tls_fingerprints.append({"sha256": sha256_fingerprint})
 
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 6fe5a6a26a..983f0ead8c 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -326,9 +326,7 @@ class Keyring(object):
             verify_requests (list[VerifyJsonRequest]): list of verify requests
         """
 
-        remaining_requests = set(
-            (rq for rq in verify_requests if not rq.key_ready.called)
-        )
+        remaining_requests = {rq for rq in verify_requests if not rq.key_ready.called}
 
         @defer.inlineCallbacks
         def do_iterations():
@@ -396,7 +394,7 @@ class Keyring(object):
 
         results = yield fetcher.get_keys(missing_keys)
 
-        completed = list()
+        completed = []
         for verify_request in remaining_requests:
             server_name = verify_request.server_name
 
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 001bb304ae..876fb0e245 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -129,9 +129,9 @@ class FederationRemoteSendQueue(object):
             for key in keys[:i]:
                 del self.presence_changed[key]
 
-            user_ids = set(
+            user_ids = {
                 user_id for uids in self.presence_changed.values() for user_id in uids
-            )
+            }
 
             keys = self.presence_destinations.keys()
             i = self.presence_destinations.bisect_left(position_to_delete)
diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py
index c106abae21..4f0dc0a209 100644
--- a/synapse/groups/groups_server.py
+++ b/synapse/groups/groups_server.py
@@ -608,7 +608,7 @@ class GroupsServerHandler(GroupsServerWorkerHandler):
         user_results = yield self.store.get_users_in_group(
             group_id, include_private=True
         )
-        if user_id in [user_result["user_id"] for user_result in user_results]:
+        if user_id in (user_result["user_id"] for user_result in user_results):
             raise SynapseError(400, "User already in group")
 
         content = {
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 50cea3f378..a514c30714 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -742,6 +742,6 @@ class DeviceListUpdater(object):
 
         # We clobber the seen updates since we've re-synced from a given
         # point.
-        self._seen_updates[user_id] = set([stream_id])
+        self._seen_updates[user_id] = {stream_id}
 
         defer.returnValue(result)
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 921d887b24..0b23ca919a 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -72,7 +72,7 @@ class DirectoryHandler(BaseHandler):
         # TODO(erikj): Check if there is a current association.
         if not servers:
             users = yield self.state.get_current_users_in_room(room_id)
-            servers = set(get_domain_from_id(u) for u in users)
+            servers = {get_domain_from_id(u) for u in users}
 
         if not servers:
             raise SynapseError(400, "Failed to get server list")
@@ -255,7 +255,7 @@ class DirectoryHandler(BaseHandler):
             )
 
         users = yield self.state.get_current_users_in_room(room_id)
-        extra_servers = set(get_domain_from_id(u) for u in users)
+        extra_servers = {get_domain_from_id(u) for u in users}
         servers = set(extra_servers) | set(servers)
 
         # If this server is in the list of servers, return it first.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index eb20ef4aec..a689065f89 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -659,11 +659,11 @@ class FederationHandler(BaseHandler):
         # this can happen if a remote server claims that the state or
         # auth_events at an event in room A are actually events in room B
 
-        bad_events = list(
+        bad_events = [
             (event_id, event.room_id)
             for event_id, event in fetched_events.items()
             if event.room_id != room_id
-        )
+        ]
 
         for bad_event_id, bad_room_id in bad_events:
             # This is a bogus situation, but since we may only discover it a long time
@@ -856,7 +856,7 @@ class FederationHandler(BaseHandler):
 
         # Don't bother processing events we already have.
         seen_events = await self.store.have_events_in_timeline(
-            set(e.event_id for e in events)
+            {e.event_id for e in events}
         )
 
         events = [e for e in events if e.event_id not in seen_events]
@@ -866,7 +866,7 @@ class FederationHandler(BaseHandler):
 
         event_map = {e.event_id: e for e in events}
 
-        event_ids = set(e.event_id for e in events)
+        event_ids = {e.event_id for e in events}
 
         # build a list of events whose prev_events weren't in the batch.
         # (XXX: this will include events whose prev_events we already have; that doesn't
@@ -892,13 +892,13 @@ class FederationHandler(BaseHandler):
             state_events.update({s.event_id: s for s in state})
             events_to_state[e_id] = state
 
-        required_auth = set(
+        required_auth = {
             a_id
             for event in events
             + list(state_events.values())
             + list(auth_events.values())
             for a_id in event.auth_event_ids()
-        )
+        }
         auth_events.update(
             {e_id: event_map[e_id] for e_id in required_auth if e_id in event_map}
         )
@@ -1247,7 +1247,7 @@ class FederationHandler(BaseHandler):
     async def on_event_auth(self, event_id: str) -> List[EventBase]:
         event = await self.store.get_event(event_id)
         auth = await self.store.get_auth_chain(
-            [auth_id for auth_id in event.auth_event_ids()], include_given=True
+            list(event.auth_event_ids()), include_given=True
         )
         return list(auth)
 
@@ -2152,7 +2152,7 @@ class FederationHandler(BaseHandler):
 
         # Now get the current auth_chain for the event.
         local_auth_chain = await self.store.get_auth_chain(
-            [auth_id for auth_id in event.auth_event_ids()], include_given=True
+            list(event.auth_event_ids()), include_given=True
         )
 
         # TODO: Check if we would now reject event_id. If so we need to tell
@@ -2654,7 +2654,7 @@ class FederationHandler(BaseHandler):
             member_handler = self.hs.get_room_member_handler()
             yield member_handler.send_membership_event(None, event, context)
         else:
-            destinations = set(x.split(":", 1)[-1] for x in (sender_user_id, room_id))
+            destinations = {x.split(":", 1)[-1] for x in (sender_user_id, room_id)}
             yield self.federation_client.forward_third_party_invite(
                 destinations, room_id, event_dict
             )
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 202aa9294f..0d6cf2b008 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -313,7 +313,7 @@ class PresenceHandler(object):
                 notified_presence_counter.inc(len(to_notify))
                 yield self._persist_and_notify(list(to_notify.values()))
 
-            self.unpersisted_users_changes |= set(s.user_id for s in new_states)
+            self.unpersisted_users_changes |= {s.user_id for s in new_states}
             self.unpersisted_users_changes -= set(to_notify.keys())
 
             to_federation_ping = {
@@ -698,7 +698,7 @@ class PresenceHandler(object):
         updates = yield self.current_state_for_users(target_user_ids)
         updates = list(updates.values())
 
-        for user_id in set(target_user_ids) - set(u.user_id for u in updates):
+        for user_id in set(target_user_ids) - {u.user_id for u in updates}:
             updates.append(UserPresenceState.default(user_id))
 
         now = self.clock.time_msec()
@@ -886,7 +886,7 @@ class PresenceHandler(object):
             hosts = yield self.state.get_current_hosts_in_room(room_id)
 
             # Filter out ourselves.
-            hosts = set(host for host in hosts if host != self.server_name)
+            hosts = {host for host in hosts if host != self.server_name}
 
             self.federation.send_presence_to_destinations(
                 states=[state], destinations=hosts
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 9283c039e3..8bc100db42 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -94,7 +94,7 @@ class ReceiptsHandler(BaseHandler):
             # no new receipts
             return False
 
-        affected_room_ids = list(set([r.room_id for r in receipts]))
+        affected_room_ids = list({r.room_id for r in receipts})
 
         self.notifier.on_new_event("receipt_key", max_batch_id, rooms=affected_room_ids)
         # Note that the min here shouldn't be relied upon to be accurate.
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 76e8f61b74..8ee870f0bb 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -355,7 +355,7 @@ class RoomCreationHandler(BaseHandler):
             # If so, mark the new room as non-federatable as well
             creation_content["m.federate"] = False
 
-        initial_state = dict()
+        initial_state = {}
 
         # Replicate relevant room events
         types_to_copy = (
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 110097eab9..ec1542d416 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -184,7 +184,7 @@ class SearchHandler(BaseHandler):
             membership_list=[Membership.JOIN],
             # membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban],
         )
-        room_ids = set(r.room_id for r in rooms)
+        room_ids = {r.room_id for r in rooms}
 
         # If doing a subset of all rooms seearch, check if any of the rooms
         # are from an upgraded room, and search their contents as well
@@ -374,12 +374,12 @@ class SearchHandler(BaseHandler):
                 ).to_string()
 
                 if include_profile:
-                    senders = set(
+                    senders = {
                         ev.sender
                         for ev in itertools.chain(
                             res["events_before"], [event], res["events_after"]
                         )
-                    )
+                    }
 
                     if res["events_after"]:
                         last_event_id = res["events_after"][-1].event_id
@@ -421,7 +421,7 @@ class SearchHandler(BaseHandler):
 
         state_results = {}
         if include_state:
-            rooms = set(e.room_id for e in allowed_events)
+            rooms = {e.room_id for e in allowed_events}
             for room_id in rooms:
                 state = yield self.state_handler.get_current_state(room_id)
                 state_results[room_id] = list(state.values())
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4324bc702e..669dbc8a48 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -682,11 +682,9 @@ class SyncHandler(object):
 
         # FIXME: order by stream ordering rather than as returned by SQL
         if joined_user_ids or invited_user_ids:
-            summary["m.heroes"] = sorted(
-                [user_id for user_id in (joined_user_ids + invited_user_ids)]
-            )[0:5]
+            summary["m.heroes"] = sorted(joined_user_ids + invited_user_ids)[0:5]
         else:
-            summary["m.heroes"] = sorted([user_id for user_id in gone_user_ids])[0:5]
+            summary["m.heroes"] = sorted(gone_user_ids)[0:5]
 
         if not sync_config.filter_collection.lazy_load_members():
             return summary
@@ -697,9 +695,9 @@ class SyncHandler(object):
 
         # track which members the client should already know about via LL:
         # Ones which are already in state...
-        existing_members = set(
+        existing_members = {
             user_id for (typ, user_id) in state.keys() if typ == EventTypes.Member
-        )
+        }
 
         # ...or ones which are in the timeline...
         for ev in batch.events:
@@ -773,10 +771,10 @@ class SyncHandler(object):
                 # We only request state for the members needed to display the
                 # timeline:
 
-                members_to_fetch = set(
+                members_to_fetch = {
                     event.sender  # FIXME: we also care about invite targets etc.
                     for event in batch.events
-                )
+                }
 
                 if full_state:
                     # always make sure we LL ourselves so we know we're in the room
@@ -1993,10 +1991,10 @@ def _calculate_state(
         )
     }
 
-    c_ids = set(e for e in itervalues(current))
-    ts_ids = set(e for e in itervalues(timeline_start))
-    p_ids = set(e for e in itervalues(previous))
-    tc_ids = set(e for e in itervalues(timeline_contains))
+    c_ids = set(itervalues(current))
+    ts_ids = set(itervalues(timeline_start))
+    p_ids = set(itervalues(previous))
+    tc_ids = set(itervalues(timeline_contains))
 
     # If we are lazyloading room members, we explicitly add the membership events
     # for the senders in the timeline into the state block returned by /sync,
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 5406618431..391bceb0c4 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -198,7 +198,7 @@ class TypingHandler(object):
                 now=now, obj=member, then=now + FEDERATION_PING_INTERVAL
             )
 
-            for domain in set(get_domain_from_id(u) for u in users):
+            for domain in {get_domain_from_id(u) for u in users}:
                 if domain != self.server_name:
                     logger.debug("sending typing update to %s", domain)
                     self.federation.build_and_send_edu(
@@ -231,7 +231,7 @@ class TypingHandler(object):
             return
 
         users = yield self.state.get_current_users_in_room(room_id)
-        domains = set(get_domain_from_id(u) for u in users)
+        domains = {get_domain_from_id(u) for u in users}
 
         if self.server_name in domains:
             logger.info("Got typing update from %s: %r", user_id, content)
diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py
index 6073fc2725..0c2527bd86 100644
--- a/synapse/logging/utils.py
+++ b/synapse/logging/utils.py
@@ -148,7 +148,7 @@ def trace_function(f):
             pathname=pathname,
             lineno=lineno,
             msg=msg,
-            args=tuple(),
+            args=(),
             exc_info=None,
         )
 
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 0b45e1f52a..0dba997a23 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -240,7 +240,7 @@ class BucketCollector(object):
         res.append(["+Inf", sum(data.values())])
 
         metric = HistogramMetricFamily(
-            self.name, "", buckets=res, sum_value=sum([x * y for x, y in data.items()])
+            self.name, "", buckets=res, sum_value=sum(x * y for x, y in data.items())
         )
         yield metric
 
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index c53d2a0d40..b65bcd8806 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -80,13 +80,13 @@ _background_process_db_sched_duration = Counter(
 # map from description to a counter, so that we can name our logcontexts
 # incrementally. (It actually duplicates _background_process_start_count, but
 # it's much simpler to do so than to try to combine them.)
-_background_process_counts = dict()  # type: dict[str, int]
+_background_process_counts = {}  # type: dict[str, int]
 
 # map from description to the currently running background processes.
 #
 # it's kept as a dict of sets rather than a big set so that we can keep track
 # of process descriptions that no longer have any active processes.
-_background_processes = dict()  # type: dict[str, set[_BackgroundProcess]]
+_background_processes = {}  # type: dict[str, set[_BackgroundProcess]]
 
 # A lock that covers the above dicts
 _bg_metrics_lock = threading.Lock()
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 7d9f5a38d9..433ca2f416 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -400,11 +400,11 @@ class RulesForRoom(object):
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug("Found members %r: %r", self.room_id, members.values())
 
-        interested_in_user_ids = set(
+        interested_in_user_ids = {
             user_id
             for user_id, membership in itervalues(members)
             if membership == Membership.JOIN
-        )
+        }
 
         logger.debug("Joined: %r", interested_in_user_ids)
 
@@ -412,9 +412,9 @@ class RulesForRoom(object):
             interested_in_user_ids, on_invalidate=self.invalidate_all_cb
         )
 
-        user_ids = set(
+        user_ids = {
             uid for uid, have_pusher in iteritems(if_users_with_pushers) if have_pusher
-        )
+        }
 
         logger.debug("With pushers: %r", user_ids)
 
diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py
index 8c818a86bf..ba4551d619 100644
--- a/synapse/push/emailpusher.py
+++ b/synapse/push/emailpusher.py
@@ -204,7 +204,7 @@ class EmailPusher(object):
                 yield self.send_notification(unprocessed, reason)
 
                 yield self.save_last_stream_ordering_and_success(
-                    max([ea["stream_ordering"] for ea in unprocessed])
+                    max(ea["stream_ordering"] for ea in unprocessed)
                 )
 
                 # we update the throttle on all the possible unprocessed push actions
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index b13b646bfd..4ccaf178ce 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -526,12 +526,10 @@ class Mailer(object):
                     # If the room doesn't have a name, say who the messages
                     # are from explicitly to avoid, "messages in the Bob room"
                     sender_ids = list(
-                        set(
-                            [
-                                notif_events[n["event_id"]].sender
-                                for n in notifs_by_room[room_id]
-                            ]
-                        )
+                        {
+                            notif_events[n["event_id"]].sender
+                            for n in notifs_by_room[room_id]
+                        }
                     )
 
                     member_events = yield self.store.get_events(
@@ -558,12 +556,10 @@ class Mailer(object):
                 # If the reason room doesn't have a name, say who the messages
                 # are from explicitly to avoid, "messages in the Bob room"
                 sender_ids = list(
-                    set(
-                        [
-                            notif_events[n["event_id"]].sender
-                            for n in notifs_by_room[reason["room_id"]]
-                        ]
-                    )
+                    {
+                        notif_events[n["event_id"]].sender
+                        for n in notifs_by_room[reason["room_id"]]
+                    }
                 )
 
                 member_events = yield self.store.get_events(
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index b9dca5bc63..01789a9fb4 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -191,7 +191,7 @@ class PusherPool:
                 min_stream_id - 1, max_stream_id
             )
             # This returns a tuple, user_id is at index 3
-            users_affected = set([r[3] for r in updated_receipts])
+            users_affected = {r[3] for r in updated_receipts}
 
             for u in users_affected:
                 if u in self.pushers:
diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py
index 459482eb6d..a96f75ce26 100644
--- a/synapse/rest/admin/_base.py
+++ b/synapse/rest/admin/_base.py
@@ -29,7 +29,7 @@ def historical_admin_path_patterns(path_regex):
     Note that this should only be used for existing endpoints: new ones should just
     register for the /_synapse/admin path.
     """
-    return list(
+    return [
         re.compile(prefix + path_regex)
         for prefix in (
             "^/_synapse/admin/v1",
@@ -37,7 +37,7 @@ def historical_admin_path_patterns(path_regex):
             "^/_matrix/client/unstable/admin",
             "^/_matrix/client/r0/admin",
         )
-    )
+    ]
 
 
 def admin_patterns(path_regex: str):
diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/v1/push_rule.py
index 4f74600239..9fd4908136 100644
--- a/synapse/rest/client/v1/push_rule.py
+++ b/synapse/rest/client/v1/push_rule.py
@@ -49,7 +49,7 @@ class PushRuleRestServlet(RestServlet):
         if self._is_worker:
             raise Exception("Cannot handle PUT /push_rules on worker")
 
-        spec = _rule_spec_from_path([x for x in path.split("/")])
+        spec = _rule_spec_from_path(path.split("/"))
         try:
             priority_class = _priority_class_from_spec(spec)
         except InvalidRuleException as e:
@@ -110,7 +110,7 @@ class PushRuleRestServlet(RestServlet):
         if self._is_worker:
             raise Exception("Cannot handle DELETE /push_rules on worker")
 
-        spec = _rule_spec_from_path([x for x in path.split("/")])
+        spec = _rule_spec_from_path(path.split("/"))
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
@@ -138,7 +138,7 @@ class PushRuleRestServlet(RestServlet):
 
         rules = format_push_rules_for_user(requester.user, rules)
 
-        path = [x for x in path.split("/")][1:]
+        path = path.split("/")[1:]
 
         if path == []:
             # we're a reference impl: pedantry is our job.
diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/v1/pusher.py
index 6f6b7aed6e..550a2f1b44 100644
--- a/synapse/rest/client/v1/pusher.py
+++ b/synapse/rest/client/v1/pusher.py
@@ -54,9 +54,9 @@ class PushersRestServlet(RestServlet):
 
         pushers = await self.hs.get_datastore().get_pushers_by_user_id(user.to_string())
 
-        filtered_pushers = list(
+        filtered_pushers = [
             {k: v for k, v in p.items() if k in ALLOWED_KEYS} for p in pushers
-        )
+        ]
 
         return 200, {"pushers": filtered_pushers}
 
diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py
index d8292ce29f..8fa68dd37f 100644
--- a/synapse/rest/client/v2_alpha/sync.py
+++ b/synapse/rest/client/v2_alpha/sync.py
@@ -72,7 +72,7 @@ class SyncRestServlet(RestServlet):
     """
 
     PATTERNS = client_patterns("/sync$")
-    ALLOWED_PRESENCE = set(["online", "offline", "unavailable"])
+    ALLOWED_PRESENCE = {"online", "offline", "unavailable"}
 
     def __init__(self, hs):
         super(SyncRestServlet, self).__init__()
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 9d6813a047..4b6d030a57 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -149,7 +149,7 @@ class RemoteKey(DirectServeResource):
 
         time_now_ms = self.clock.time_msec()
 
-        cache_misses = dict()  # type: Dict[str, Set[str]]
+        cache_misses = {}  # type: Dict[str, Set[str]]
         for (server_name, key_id, from_server), results in cached.items():
             results = [(result["ts_added_ms"], result) for result in results]
 
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 65bbf00073..ba28dd089d 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -135,27 +135,25 @@ def add_file_headers(request, media_type, file_size, upload_name):
 
 # separators as defined in RFC2616. SP and HT are handled separately.
 # see _can_encode_filename_as_token.
-_FILENAME_SEPARATOR_CHARS = set(
-    (
-        "(",
-        ")",
-        "<",
-        ">",
-        "@",
-        ",",
-        ";",
-        ":",
-        "\\",
-        '"',
-        "/",
-        "[",
-        "]",
-        "?",
-        "=",
-        "{",
-        "}",
-    )
-)
+_FILENAME_SEPARATOR_CHARS = {
+    "(",
+    ")",
+    "<",
+    ">",
+    "@",
+    ",",
+    ";",
+    ":",
+    "\\",
+    '"',
+    "/",
+    "[",
+    "]",
+    "?",
+    "=",
+    "{",
+    "}",
+}
 
 
 def _can_encode_filename_as_token(x):
diff --git a/synapse/state/v1.py b/synapse/state/v1.py
index 24b7c0faef..9bf98d06f2 100644
--- a/synapse/state/v1.py
+++ b/synapse/state/v1.py
@@ -69,9 +69,9 @@ def resolve_events_with_store(
 
     unconflicted_state, conflicted_state = _seperate(state_sets)
 
-    needed_events = set(
+    needed_events = {
         event_id for event_ids in itervalues(conflicted_state) for event_id in event_ids
-    )
+    }
     needed_event_count = len(needed_events)
     if event_map is not None:
         needed_events -= set(iterkeys(event_map))
@@ -261,11 +261,11 @@ def _resolve_state_events(conflicted_state, auth_events):
 
 
 def _resolve_auth_events(events, auth_events):
-    reverse = [i for i in reversed(_ordered_events(events))]
+    reverse = list(reversed(_ordered_events(events)))
 
-    auth_keys = set(
+    auth_keys = {
         key for event in events for key in event_auth.auth_types_for_event(event)
-    )
+    }
 
     new_auth_events = {}
     for key in auth_keys:
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 75fe58305a..0ffe6d8c14 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -105,7 +105,7 @@ def resolve_events_with_store(
                 % (room_id, event.event_id, event.room_id,)
             )
 
-    full_conflicted_set = set(eid for eid in full_conflicted_set if eid in event_map)
+    full_conflicted_set = {eid for eid in full_conflicted_set if eid in event_map}
 
     logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
 
@@ -233,7 +233,7 @@ def _get_auth_chain_difference(state_sets, event_map, state_res_store):
 
     auth_sets = []
     for state_set in state_sets:
-        auth_ids = set(
+        auth_ids = {
             eid
             for key, eid in iteritems(state_set)
             if (
@@ -246,7 +246,7 @@ def _get_auth_chain_difference(state_sets, event_map, state_res_store):
                 )
             )
             and eid not in common
-        )
+        }
 
         auth_chain = yield state_res_store.get_auth_chain(auth_ids, common)
         auth_ids.update(auth_chain)
@@ -275,7 +275,7 @@ def _seperate(state_sets):
     conflicted_state = {}
 
     for key in set(itertools.chain.from_iterable(state_sets)):
-        event_ids = set(state_set.get(key) for state_set in state_sets)
+        event_ids = {state_set.get(key) for state_set in state_sets}
         if len(event_ids) == 1:
             unconflicted_state[key] = event_ids.pop()
         else:
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index da3b99f93d..13de5f1f62 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -56,7 +56,7 @@ class SQLBaseStore(metaclass=ABCMeta):
             members_changed (iterable[str]): The user_ids of members that have
                 changed
         """
-        for host in set(get_domain_from_id(u) for u in members_changed):
+        for host in {get_domain_from_id(u) for u in members_changed}:
             self._attempt_to_invalidate_cache("is_host_joined", (room_id, host))
             self._attempt_to_invalidate_cache("was_host_joined", (room_id, host))
 
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index bd547f35cf..eb1a7e5002 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -189,7 +189,7 @@ class BackgroundUpdater(object):
                 keyvalues=None,
                 retcols=("update_name", "depends_on"),
             )
-            in_flight = set(update["update_name"] for update in updates)
+            in_flight = {update["update_name"] for update in updates}
             for update in updates:
                 if update["depends_on"] not in in_flight:
                     self._background_update_queue.append(update["update_name"])
diff --git a/synapse/storage/data_stores/main/appservice.py b/synapse/storage/data_stores/main/appservice.py
index b2f39649fd..efbc06c796 100644
--- a/synapse/storage/data_stores/main/appservice.py
+++ b/synapse/storage/data_stores/main/appservice.py
@@ -135,7 +135,7 @@ class ApplicationServiceTransactionWorkerStore(
             may be empty.
         """
         results = yield self.db.simple_select_list(
-            "application_services_state", dict(state=state), ["as_id"]
+            "application_services_state", {"state": state}, ["as_id"]
         )
         # NB: This assumes this class is linked with ApplicationServiceStore
         as_list = self.get_app_services()
@@ -158,7 +158,7 @@ class ApplicationServiceTransactionWorkerStore(
         """
         result = yield self.db.simple_select_one(
             "application_services_state",
-            dict(as_id=service.id),
+            {"as_id": service.id},
             ["state"],
             allow_none=True,
             desc="get_appservice_state",
@@ -177,7 +177,7 @@ class ApplicationServiceTransactionWorkerStore(
             A Deferred which resolves when the state was set successfully.
         """
         return self.db.simple_upsert(
-            "application_services_state", dict(as_id=service.id), dict(state=state)
+            "application_services_state", {"as_id": service.id}, {"state": state}
         )
 
     def create_appservice_txn(self, service, events):
@@ -253,13 +253,15 @@ class ApplicationServiceTransactionWorkerStore(
             self.db.simple_upsert_txn(
                 txn,
                 "application_services_state",
-                dict(as_id=service.id),
-                dict(last_txn=txn_id),
+                {"as_id": service.id},
+                {"last_txn": txn_id},
             )
 
             # Delete txn
             self.db.simple_delete_txn(
-                txn, "application_services_txns", dict(txn_id=txn_id, as_id=service.id)
+                txn,
+                "application_services_txns",
+                {"txn_id": txn_id, "as_id": service.id},
             )
 
         return self.db.runInteraction(
diff --git a/synapse/storage/data_stores/main/client_ips.py b/synapse/storage/data_stores/main/client_ips.py
index 13f4c9c72e..e1ccb27142 100644
--- a/synapse/storage/data_stores/main/client_ips.py
+++ b/synapse/storage/data_stores/main/client_ips.py
@@ -530,7 +530,7 @@ class ClientIpStore(ClientIpBackgroundUpdateStore):
             ((row["access_token"], row["ip"]), (row["user_agent"], row["last_seen"]))
             for row in rows
         )
-        return list(
+        return [
             {
                 "access_token": access_token,
                 "ip": ip,
@@ -538,7 +538,7 @@ class ClientIpStore(ClientIpBackgroundUpdateStore):
                 "last_seen": last_seen,
             }
             for (access_token, ip), (user_agent, last_seen) in iteritems(results)
-        )
+        ]
 
     @wrap_as_background_process("prune_old_user_ips")
     async def _prune_old_user_ips(self):
diff --git a/synapse/storage/data_stores/main/devices.py b/synapse/storage/data_stores/main/devices.py
index b7617efb80..d55733a4cd 100644
--- a/synapse/storage/data_stores/main/devices.py
+++ b/synapse/storage/data_stores/main/devices.py
@@ -137,7 +137,7 @@ class DeviceWorkerStore(SQLBaseStore):
 
         # get the cross-signing keys of the users in the list, so that we can
         # determine which of the device changes were cross-signing keys
-        users = set(r[0] for r in updates)
+        users = {r[0] for r in updates}
         master_key_by_user = {}
         self_signing_key_by_user = {}
         for user in users:
@@ -446,7 +446,7 @@ class DeviceWorkerStore(SQLBaseStore):
             a set of user_ids and results_map is a mapping of
             user_id -> device_id -> device_info
         """
-        user_ids = set(user_id for user_id, _ in query_list)
+        user_ids = {user_id for user_id, _ in query_list}
         user_map = yield self.get_device_list_last_stream_id_for_remotes(list(user_ids))
 
         # We go and check if any of the users need to have their device lists
@@ -454,10 +454,9 @@ class DeviceWorkerStore(SQLBaseStore):
         users_needing_resync = yield self.get_user_ids_requiring_device_list_resync(
             user_ids
         )
-        user_ids_in_cache = (
-            set(user_id for user_id, stream_id in user_map.items() if stream_id)
-            - users_needing_resync
-        )
+        user_ids_in_cache = {
+            user_id for user_id, stream_id in user_map.items() if stream_id
+        } - users_needing_resync
         user_ids_not_in_cache = user_ids - user_ids_in_cache
 
         results = {}
@@ -604,7 +603,7 @@ class DeviceWorkerStore(SQLBaseStore):
             rows = yield self.db.execute(
                 "get_users_whose_signatures_changed", None, sql, user_id, from_key
             )
-            return set(user for row in rows for user in json.loads(row[0]))
+            return {user for row in rows for user in json.loads(row[0])}
         else:
             return set()
 
diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py
index 750ec1b70d..49a7b8b433 100644
--- a/synapse/storage/data_stores/main/event_federation.py
+++ b/synapse/storage/data_stores/main/event_federation.py
@@ -426,7 +426,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
                     query, (room_id, event_id, False, limit - len(event_results))
                 )
 
-                new_results = set(t[0] for t in txn) - seen_events
+                new_results = {t[0] for t in txn} - seen_events
 
                 new_front |= new_results
                 seen_events |= new_results
diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py
index c9d0d68c3a..8ae23df00a 100644
--- a/synapse/storage/data_stores/main/events.py
+++ b/synapse/storage/data_stores/main/events.py
@@ -145,7 +145,7 @@ class EventsStore(
             return txn.fetchall()
 
         res = yield self.db.runInteraction("read_forward_extremities", fetch)
-        self._current_forward_extremities_amount = c_counter(list(x[0] for x in res))
+        self._current_forward_extremities_amount = c_counter([x[0] for x in res])
 
     @_retry_on_integrity_error
     @defer.inlineCallbacks
@@ -598,11 +598,11 @@ class EventsStore(
             # We find out which membership events we may have deleted
             # and which we have added, then we invlidate the caches for all
             # those users.
-            members_changed = set(
+            members_changed = {
                 state_key
                 for ev_type, state_key in itertools.chain(to_delete, to_insert)
                 if ev_type == EventTypes.Member
-            )
+            }
 
             for member in members_changed:
                 txn.call_after(
@@ -1615,7 +1615,7 @@ class EventsStore(
         """
         )
 
-        referenced_state_groups = set(sg for sg, in txn)
+        referenced_state_groups = {sg for sg, in txn}
         logger.info(
             "[purge] found %i referenced state groups", len(referenced_state_groups)
         )
diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py
index 5177b71016..f54c8b1ee0 100644
--- a/synapse/storage/data_stores/main/events_bg_updates.py
+++ b/synapse/storage/data_stores/main/events_bg_updates.py
@@ -402,7 +402,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
                     keyvalues={},
                     retcols=("room_id",),
                 )
-                room_ids = set(row["room_id"] for row in rows)
+                room_ids = {row["room_id"] for row in rows}
                 for room_id in room_ids:
                     txn.call_after(
                         self.get_latest_event_ids_in_room.invalidate, (room_id,)
diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py
index 7251e819f5..47a3a26072 100644
--- a/synapse/storage/data_stores/main/events_worker.py
+++ b/synapse/storage/data_stores/main/events_worker.py
@@ -494,9 +494,9 @@ class EventsWorkerStore(SQLBaseStore):
         """
         with Measure(self._clock, "_fetch_event_list"):
             try:
-                events_to_fetch = set(
+                events_to_fetch = {
                     event_id for events, _ in event_list for event_id in events
-                )
+                }
 
                 row_dict = self.db.new_transaction(
                     conn, "do_fetch", [], [], self._fetch_event_rows, events_to_fetch
@@ -804,7 +804,7 @@ class EventsWorkerStore(SQLBaseStore):
             desc="have_events_in_timeline",
         )
 
-        return set(r["event_id"] for r in rows)
+        return {r["event_id"] for r in rows}
 
     @defer.inlineCallbacks
     def have_seen_events(self, event_ids):
diff --git a/synapse/storage/data_stores/main/push_rule.py b/synapse/storage/data_stores/main/push_rule.py
index e2673ae073..62ac88d9f2 100644
--- a/synapse/storage/data_stores/main/push_rule.py
+++ b/synapse/storage/data_stores/main/push_rule.py
@@ -276,21 +276,21 @@ class PushRulesWorkerStore(
         # We ignore app service users for now. This is so that we don't fill
         # up the `get_if_users_have_pushers` cache with AS entries that we
         # know don't have pushers, nor even read receipts.
-        local_users_in_room = set(
+        local_users_in_room = {
             u
             for u in users_in_room
             if self.hs.is_mine_id(u)
             and not self.get_if_app_services_interested_in_user(u)
-        )
+        }
 
         # users in the room who have pushers need to get push rules run because
         # that's how their pushers work
         if_users_with_pushers = yield self.get_if_users_have_pushers(
             local_users_in_room, on_invalidate=cache_context.invalidate
         )
-        user_ids = set(
+        user_ids = {
             uid for uid, have_pusher in if_users_with_pushers.items() if have_pusher
-        )
+        }
 
         users_with_receipts = yield self.get_users_with_read_receipts_in_room(
             room_id, on_invalidate=cache_context.invalidate
diff --git a/synapse/storage/data_stores/main/receipts.py b/synapse/storage/data_stores/main/receipts.py
index 96e54d145e..0d932a0672 100644
--- a/synapse/storage/data_stores/main/receipts.py
+++ b/synapse/storage/data_stores/main/receipts.py
@@ -58,7 +58,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     @cachedInlineCallbacks()
     def get_users_with_read_receipts_in_room(self, room_id):
         receipts = yield self.get_receipts_for_room(room_id, "m.read")
-        return set(r["user_id"] for r in receipts)
+        return {r["user_id"] for r in receipts}
 
     @cached(num_args=2)
     def get_receipts_for_room(self, room_id, receipt_type):
@@ -283,7 +283,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 args.append(limit)
             txn.execute(sql, args)
 
-            return list(r[0:5] + (json.loads(r[5]),) for r in txn)
+            return [r[0:5] + (json.loads(r[5]),) for r in txn]
 
         return self.db.runInteraction(
             "get_all_updated_receipts", get_all_updated_receipts_txn
diff --git a/synapse/storage/data_stores/main/roommember.py b/synapse/storage/data_stores/main/roommember.py
index d5ced05701..d5bd0cb5cf 100644
--- a/synapse/storage/data_stores/main/roommember.py
+++ b/synapse/storage/data_stores/main/roommember.py
@@ -465,7 +465,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
             txn.execute(sql % (clause,), args)
 
-            return set(row[0] for row in txn)
+            return {row[0] for row in txn}
 
         return await self.db.runInteraction(
             "get_users_server_still_shares_room_with",
@@ -826,7 +826,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                 GROUP BY room_id, user_id;
             """
             txn.execute(sql, (user_id,))
-            return set(row[0] for row in txn if row[1] == 0)
+            return {row[0] for row in txn if row[1] == 0}
 
         return self.db.runInteraction(
             "get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn
diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py
index 3d34103e67..3a3b9a8e72 100644
--- a/synapse/storage/data_stores/main/state.py
+++ b/synapse/storage/data_stores/main/state.py
@@ -321,7 +321,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
             desc="get_referenced_state_groups",
         )
 
-        return set(row["state_group"] for row in rows)
+        return {row["state_group"] for row in rows}
 
 
 class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
@@ -367,7 +367,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
             """
 
             txn.execute(sql, (last_room_id, batch_size))
-            room_ids = list(row[0] for row in txn)
+            room_ids = [row[0] for row in txn]
             if not room_ids:
                 return True, set()
 
@@ -384,7 +384,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
 
             txn.execute(sql, (last_room_id, room_ids[-1], "%:" + self.server_name))
 
-            joined_room_ids = set(row[0] for row in txn)
+            joined_room_ids = {row[0] for row in txn}
 
             left_rooms = set(room_ids) - joined_room_ids
 
@@ -404,7 +404,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
                 retcols=("state_key",),
             )
 
-            potentially_left_users = set(row["state_key"] for row in rows)
+            potentially_left_users = {row["state_key"] for row in rows}
 
             # Now lets actually delete the rooms from the DB.
             self.db.simple_delete_many_txn(
diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py
index 056b25b13a..ada5cce6c2 100644
--- a/synapse/storage/data_stores/main/stream.py
+++ b/synapse/storage/data_stores/main/stream.py
@@ -346,11 +346,11 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             from_key (str): The room_key portion of a StreamToken
         """
         from_key = RoomStreamToken.parse_stream_token(from_key).stream
-        return set(
+        return {
             room_id
             for room_id in room_ids
             if self._events_stream_cache.has_entity_changed(room_id, from_key)
-        )
+        }
 
     @defer.inlineCallbacks
     def get_room_events_stream_for_room(
@@ -679,11 +679,11 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         )
 
         events_before = yield self.get_events_as_list(
-            [e for e in results["before"]["event_ids"]], get_prev_content=True
+            list(results["before"]["event_ids"]), get_prev_content=True
         )
 
         events_after = yield self.get_events_as_list(
-            [e for e in results["after"]["event_ids"]], get_prev_content=True
+            list(results["after"]["event_ids"]), get_prev_content=True
         )
 
         return {
diff --git a/synapse/storage/data_stores/main/user_erasure_store.py b/synapse/storage/data_stores/main/user_erasure_store.py
index af8025bc17..ec6b8a4ffd 100644
--- a/synapse/storage/data_stores/main/user_erasure_store.py
+++ b/synapse/storage/data_stores/main/user_erasure_store.py
@@ -63,9 +63,9 @@ class UserErasureWorkerStore(SQLBaseStore):
             retcols=("user_id",),
             desc="are_users_erased",
         )
-        erased_users = set(row["user_id"] for row in rows)
+        erased_users = {row["user_id"] for row in rows}
 
-        res = dict((u, u in erased_users) for u in user_ids)
+        res = {u: u in erased_users for u in user_ids}
         return res
 
 
diff --git a/synapse/storage/data_stores/state/store.py b/synapse/storage/data_stores/state/store.py
index c4ee9b7ccb..57a5267663 100644
--- a/synapse/storage/data_stores/state/store.py
+++ b/synapse/storage/data_stores/state/store.py
@@ -520,11 +520,11 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             retcols=("state_group",),
         )
 
-        remaining_state_groups = set(
+        remaining_state_groups = {
             row["state_group"]
             for row in rows
             if row["state_group"] not in state_groups_to_delete
-        )
+        }
 
         logger.info(
             "[purge] de-delta-ing %i remaining state groups",
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 6dcb5c04da..1953614401 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -554,8 +554,8 @@ class Database(object):
         Returns:
             A list of dicts where the key is the column header.
         """
-        col_headers = list(intern(str(column[0])) for column in cursor.description)
-        results = list(dict(zip(col_headers, row)) for row in cursor)
+        col_headers = [intern(str(column[0])) for column in cursor.description]
+        results = [dict(zip(col_headers, row)) for row in cursor]
         return results
 
     def execute(self, desc, decoder, query, *args):
diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py
index b950550f23..0f9ac1cf09 100644
--- a/synapse/storage/persist_events.py
+++ b/synapse/storage/persist_events.py
@@ -602,14 +602,14 @@ class EventsPersistenceStorage(object):
             event_id_to_state_group.update(event_to_groups)
 
         # State groups of old_latest_event_ids
-        old_state_groups = set(
+        old_state_groups = {
             event_id_to_state_group[evid] for evid in old_latest_event_ids
-        )
+        }
 
         # State groups of new_latest_event_ids
-        new_state_groups = set(
+        new_state_groups = {
             event_id_to_state_group[evid] for evid in new_latest_event_ids
-        )
+        }
 
         # If they old and new groups are the same then we don't need to do
         # anything.
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index c285ef52a0..fc69c32a0a 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -345,9 +345,9 @@ def _upgrade_existing_database(
                     "Could not open delta dir for version %d: %s" % (v, directory)
                 )
 
-        duplicates = set(
+        duplicates = {
             file_name for file_name, count in file_name_counter.items() if count > 1
-        )
+        }
         if duplicates:
             # We don't support using the same file name in the same delta version.
             raise PrepareDatabaseException(
@@ -454,7 +454,7 @@ def _apply_module_schema_files(cur, database_engine, modname, names_and_streams)
         ),
         (modname,),
     )
-    applied_deltas = set(d for d, in cur)
+    applied_deltas = {d for d, in cur}
     for (name, stream) in names_and_streams:
         if name in applied_deltas:
             continue
diff --git a/synapse/util/frozenutils.py b/synapse/util/frozenutils.py
index 635b897d6c..f2ccd5e7c6 100644
--- a/synapse/util/frozenutils.py
+++ b/synapse/util/frozenutils.py
@@ -30,7 +30,7 @@ def freeze(o):
         return o
 
     try:
-        return tuple([freeze(i) for i in o])
+        return tuple(freeze(i) for i in o)
     except TypeError:
         pass
 
diff --git a/synapse/visibility.py b/synapse/visibility.py
index d0abd8f04f..e60d9756b7 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -75,7 +75,7 @@ def filter_events_for_client(
     """
     # Filter out events that have been soft failed so that we don't relay them
     # to clients.
-    events = list(e for e in events if not e.internal_metadata.is_soft_failed())
+    events = [e for e in events if not e.internal_metadata.is_soft_failed()]
 
     types = ((EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id))
     event_id_to_state = yield storage.state.get_state_for_events(
@@ -97,7 +97,7 @@ def filter_events_for_client(
     erased_senders = yield storage.main.are_users_erased((e.sender for e in events))
 
     if apply_retention_policies:
-        room_ids = set(e.room_id for e in events)
+        room_ids = {e.room_id for e in events}
         retention_policies = {}
 
         for room_id in room_ids:
diff --git a/tests/config/test_generate.py b/tests/config/test_generate.py
index 2684e662de..463855ecc8 100644
--- a/tests/config/test_generate.py
+++ b/tests/config/test_generate.py
@@ -48,7 +48,7 @@ class ConfigGenerationTestCase(unittest.TestCase):
             )
 
         self.assertSetEqual(
-            set(["homeserver.yaml", "lemurs.win.log.config", "lemurs.win.signing.key"]),
+            {"homeserver.yaml", "lemurs.win.log.config", "lemurs.win.signing.key"},
             set(os.listdir(self.dir)),
         )
 
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index e7d8699040..296dc887be 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -83,7 +83,7 @@ class StateQueryTests(unittest.FederatingHomeserverTestCase):
             )
         )
 
-        self.assertEqual(members, set(["@user:other.example.com", u1]))
+        self.assertEqual(members, {"@user:other.example.com", u1})
         self.assertEqual(len(channel.json_body["pdus"]), 6)
 
     def test_needs_to_be_in_room(self):
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index c171038df8..64915bafcd 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -338,7 +338,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         )
 
         new_state = handle_timeout(
-            state, is_mine=True, syncing_user_ids=set([user_id]), now=now
+            state, is_mine=True, syncing_user_ids={user_id}, now=now
         )
 
         self.assertIsNotNone(new_state)
@@ -579,7 +579,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(expected_state.state, PresenceState.ONLINE)
         self.federation_sender.send_presence_to_destinations.assert_called_once_with(
-            destinations=set(("server2", "server3")), states=[expected_state]
+            destinations={"server2", "server3"}, states=[expected_state]
         )
 
     def _add_new_user(self, room_id, user_id):
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 140cc0a3c2..07b204666e 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -129,12 +129,12 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         hs.get_auth().check_user_in_room = check_user_in_room
 
         def get_joined_hosts_for_room(room_id):
-            return set(member.domain for member in self.room_members)
+            return {member.domain for member in self.room_members}
 
         self.datastore.get_joined_hosts_for_room = get_joined_hosts_for_room
 
         def get_current_users_in_room(room_id):
-            return set(str(u) for u in self.room_members)
+            return {str(u) for u in self.room_members}
 
         hs.get_state_handler().get_current_users_in_room = get_current_users_in_room
 
@@ -257,7 +257,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
 
         member = RoomMember(ROOM_ID, U_APPLE.to_string())
         self.handler._member_typing_until[member] = 1002000
-        self.handler._room_typing[ROOM_ID] = set([U_APPLE.to_string()])
+        self.handler._room_typing[ROOM_ID] = {U_APPLE.to_string()}
 
         self.assertEquals(self.event_source.get_current_key(), 0)
 
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index 0a4765fff4..7b92bdbc47 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -114,7 +114,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         public_users = self.get_users_in_public_rooms()
 
         self.assertEqual(
-            self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
+            self._compress_shared(shares_private), {(u1, u2, room), (u2, u1, room)}
         )
         self.assertEqual(public_users, [])
 
@@ -169,7 +169,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         public_users = self.get_users_in_public_rooms()
 
         self.assertEqual(
-            self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
+            self._compress_shared(shares_private), {(u1, u2, room), (u2, u1, room)}
         )
         self.assertEqual(public_users, [])
 
@@ -226,7 +226,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         public_users = self.get_users_in_public_rooms()
 
         self.assertEqual(
-            self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
+            self._compress_shared(shares_private), {(u1, u2, room), (u2, u1, room)}
         )
         self.assertEqual(public_users, [])
 
@@ -358,12 +358,12 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         public_users = self.get_users_in_public_rooms()
 
         # User 1 and User 2 are in the same public room
-        self.assertEqual(set(public_users), set([(u1, room), (u2, room)]))
+        self.assertEqual(set(public_users), {(u1, room), (u2, room)})
 
         # User 1 and User 3 share private rooms
         self.assertEqual(
             self._compress_shared(shares_private),
-            set([(u1, u3, private_room), (u3, u1, private_room)]),
+            {(u1, u3, private_room), (u3, u1, private_room)},
         )
 
     def test_initial_share_all_users(self):
@@ -398,7 +398,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
 
         # No users share rooms
         self.assertEqual(public_users, [])
-        self.assertEqual(self._compress_shared(shares_private), set([]))
+        self.assertEqual(self._compress_shared(shares_private), set())
 
         # Despite not sharing a room, search_all_users means we get a search
         # result.
diff --git a/tests/push/test_email.py b/tests/push/test_email.py
index 80187406bc..83032cc9ea 100644
--- a/tests/push/test_email.py
+++ b/tests/push/test_email.py
@@ -163,7 +163,7 @@ class EmailPusherTests(HomeserverTestCase):
 
         # Get the stream ordering before it gets sent
         pushers = self.get_success(
-            self.hs.get_datastore().get_pushers_by(dict(user_name=self.user_id))
+            self.hs.get_datastore().get_pushers_by({"user_name": self.user_id})
         )
         pushers = list(pushers)
         self.assertEqual(len(pushers), 1)
@@ -174,7 +174,7 @@ class EmailPusherTests(HomeserverTestCase):
 
         # It hasn't succeeded yet, so the stream ordering shouldn't have moved
         pushers = self.get_success(
-            self.hs.get_datastore().get_pushers_by(dict(user_name=self.user_id))
+            self.hs.get_datastore().get_pushers_by({"user_name": self.user_id})
         )
         pushers = list(pushers)
         self.assertEqual(len(pushers), 1)
@@ -192,7 +192,7 @@ class EmailPusherTests(HomeserverTestCase):
 
         # The stream ordering has increased
         pushers = self.get_success(
-            self.hs.get_datastore().get_pushers_by(dict(user_name=self.user_id))
+            self.hs.get_datastore().get_pushers_by({"user_name": self.user_id})
         )
         pushers = list(pushers)
         self.assertEqual(len(pushers), 1)
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index fe3441f081..baf9c785f4 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -102,7 +102,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         # Get the stream ordering before it gets sent
         pushers = self.get_success(
-            self.hs.get_datastore().get_pushers_by(dict(user_name=user_id))
+            self.hs.get_datastore().get_pushers_by({"user_name": user_id})
         )
         pushers = list(pushers)
         self.assertEqual(len(pushers), 1)
@@ -113,7 +113,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         # It hasn't succeeded yet, so the stream ordering shouldn't have moved
         pushers = self.get_success(
-            self.hs.get_datastore().get_pushers_by(dict(user_name=user_id))
+            self.hs.get_datastore().get_pushers_by({"user_name": user_id})
         )
         pushers = list(pushers)
         self.assertEqual(len(pushers), 1)
@@ -132,7 +132,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         # The stream ordering has increased
         pushers = self.get_success(
-            self.hs.get_datastore().get_pushers_by(dict(user_name=user_id))
+            self.hs.get_datastore().get_pushers_by({"user_name": user_id})
         )
         pushers = list(pushers)
         self.assertEqual(len(pushers), 1)
@@ -152,7 +152,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         # The stream ordering has increased, again
         pushers = self.get_success(
-            self.hs.get_datastore().get_pushers_by(dict(user_name=user_id))
+            self.hs.get_datastore().get_pushers_by({"user_name": user_id})
         )
         pushers = list(pushers)
         self.assertEqual(len(pushers), 1)
diff --git a/tests/rest/client/v2_alpha/test_sync.py b/tests/rest/client/v2_alpha/test_sync.py
index 9c13a13786..fa3a3ec1bd 100644
--- a/tests/rest/client/v2_alpha/test_sync.py
+++ b/tests/rest/client/v2_alpha/test_sync.py
@@ -40,16 +40,14 @@ class FilterTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(channel.code, 200)
         self.assertTrue(
-            set(
-                [
-                    "next_batch",
-                    "rooms",
-                    "presence",
-                    "account_data",
-                    "to_device",
-                    "device_lists",
-                ]
-            ).issubset(set(channel.json_body.keys()))
+            {
+                "next_batch",
+                "rooms",
+                "presence",
+                "account_data",
+                "to_device",
+                "device_lists",
+            }.issubset(set(channel.json_body.keys()))
         )
 
     def test_sync_presence_disabled(self):
@@ -63,9 +61,13 @@ class FilterTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(channel.code, 200)
         self.assertTrue(
-            set(
-                ["next_batch", "rooms", "account_data", "to_device", "device_lists"]
-            ).issubset(set(channel.json_body.keys()))
+            {
+                "next_batch",
+                "rooms",
+                "account_data",
+                "to_device",
+                "device_lists",
+            }.issubset(set(channel.json_body.keys()))
         )
 
 
diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py
index d491ea2924..e37260a820 100644
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@@ -373,7 +373,7 @@ class UpsertManyTests(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             set(self._dump_to_tuple(res)),
-            set([(1, "user1", "hello"), (2, "user2", "there")]),
+            {(1, "user1", "hello"), (2, "user2", "there")},
         )
 
         # Update only user2
@@ -400,5 +400,5 @@ class UpsertManyTests(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             set(self._dump_to_tuple(res)),
-            set([(1, "user1", "hello"), (2, "user2", "bleb")]),
+            {(1, "user1", "hello"), (2, "user2", "bleb")},
         )
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index fd52512696..31710949a8 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -69,14 +69,14 @@ class ApplicationServiceStoreTestCase(unittest.TestCase):
                 pass
 
     def _add_appservice(self, as_token, id, url, hs_token, sender):
-        as_yaml = dict(
-            url=url,
-            as_token=as_token,
-            hs_token=hs_token,
-            id=id,
-            sender_localpart=sender,
-            namespaces={},
-        )
+        as_yaml = {
+            "url": url,
+            "as_token": as_token,
+            "hs_token": hs_token,
+            "id": id,
+            "sender_localpart": sender,
+            "namespaces": {},
+        }
         # use the token as the filename
         with open(as_token, "w") as outfile:
             outfile.write(yaml.dump(as_yaml))
@@ -135,14 +135,14 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
         )
 
     def _add_service(self, url, as_token, id):
-        as_yaml = dict(
-            url=url,
-            as_token=as_token,
-            hs_token="something",
-            id=id,
-            sender_localpart="a_sender",
-            namespaces={},
-        )
+        as_yaml = {
+            "url": url,
+            "as_token": as_token,
+            "hs_token": "something",
+            "id": id,
+            "sender_localpart": "a_sender",
+            "namespaces": {},
+        }
         # use the token as the filename
         with open(as_token, "w") as outfile:
             outfile.write(yaml.dump(as_yaml))
@@ -384,8 +384,8 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
         )
         self.assertEquals(2, len(services))
         self.assertEquals(
-            set([self.as_list[2]["id"], self.as_list[0]["id"]]),
-            set([services[0].id, services[1].id]),
+            {self.as_list[2]["id"], self.as_list[0]["id"]},
+            {services[0].id, services[1].id},
         )
 
 
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index 029ac26454..0e04b2cf92 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -134,7 +134,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b)))
+        self.assertEqual(set(latest_event_ids), {event_id_a, event_id_b})
 
         # Run the background update and check it did the right thing
         self.run_background_update()
@@ -172,7 +172,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b)))
+        self.assertEqual(set(latest_event_ids), {event_id_a, event_id_b})
 
         # Run the background update and check it did the right thing
         self.run_background_update()
@@ -227,9 +227,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(
-            set(latest_event_ids), set((event_id_a, event_id_b, event_id_c))
-        )
+        self.assertEqual(set(latest_event_ids), {event_id_a, event_id_b, event_id_c})
 
         # Run the background update and check it did the right thing
         self.run_background_update()
@@ -237,7 +235,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(set(latest_event_ids), set([event_id_b, event_id_c]))
+        self.assertEqual(set(latest_event_ids), {event_id_b, event_id_c})
 
 
 class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py
index f26ff57a18..a7b7fd36d3 100644
--- a/tests/storage/test_event_metrics.py
+++ b/tests/storage/test_event_metrics.py
@@ -59,24 +59,22 @@ class ExtremStatisticsTestCase(HomeserverTestCase):
             )
         )
 
-        expected = set(
-            [
-                b'synapse_forward_extremities_bucket{le="1.0"} 0.0',
-                b'synapse_forward_extremities_bucket{le="2.0"} 2.0',
-                b'synapse_forward_extremities_bucket{le="3.0"} 2.0',
-                b'synapse_forward_extremities_bucket{le="5.0"} 2.0',
-                b'synapse_forward_extremities_bucket{le="7.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="10.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="15.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="20.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="50.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="100.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="200.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="500.0"} 3.0',
-                b'synapse_forward_extremities_bucket{le="+Inf"} 3.0',
-                b"synapse_forward_extremities_count 3.0",
-                b"synapse_forward_extremities_sum 10.0",
-            ]
-        )
+        expected = {
+            b'synapse_forward_extremities_bucket{le="1.0"} 0.0',
+            b'synapse_forward_extremities_bucket{le="2.0"} 2.0',
+            b'synapse_forward_extremities_bucket{le="3.0"} 2.0',
+            b'synapse_forward_extremities_bucket{le="5.0"} 2.0',
+            b'synapse_forward_extremities_bucket{le="7.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="10.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="15.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="20.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="50.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="100.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="200.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="500.0"} 3.0',
+            b'synapse_forward_extremities_bucket{le="+Inf"} 3.0',
+            b"synapse_forward_extremities_count 3.0",
+            b"synapse_forward_extremities_sum 10.0",
+        }
 
         self.assertEqual(items, expected)
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index 04d58fbf24..0b88308ff4 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -394,7 +394,7 @@ class StateStoreTestCase(tests.unittest.TestCase):
         ) = self.state_datastore._state_group_cache.get(group)
 
         self.assertEqual(is_all, False)
-        self.assertEqual(known_absent, set([(e1.type, e1.state_key)]))
+        self.assertEqual(known_absent, {(e1.type, e1.state_key)})
         self.assertDictEqual(state_dict_ids, {(e1.type, e1.state_key): e1.event_id})
 
         ############################################
diff --git a/tests/test_state.py b/tests/test_state.py
index d1578fe581..66f22f6813 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -254,9 +254,7 @@ class StateTestCase(unittest.TestCase):
         ctx_d = context_store["D"]
 
         prev_state_ids = yield ctx_d.get_prev_state_ids()
-        self.assertSetEqual(
-            {"START", "A", "C"}, {e_id for e_id in prev_state_ids.values()}
-        )
+        self.assertSetEqual({"START", "A", "C"}, set(prev_state_ids.values()))
 
         self.assertEqual(ctx_c.state_group, ctx_d.state_group_before_event)
         self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group)
@@ -313,9 +311,7 @@ class StateTestCase(unittest.TestCase):
         ctx_e = context_store["E"]
 
         prev_state_ids = yield ctx_e.get_prev_state_ids()
-        self.assertSetEqual(
-            {"START", "A", "B", "C"}, {e for e in prev_state_ids.values()}
-        )
+        self.assertSetEqual({"START", "A", "B", "C"}, set(prev_state_ids.values()))
         self.assertEqual(ctx_c.state_group, ctx_e.state_group_before_event)
         self.assertEqual(ctx_e.state_group_before_event, ctx_e.state_group)
 
@@ -388,9 +384,7 @@ class StateTestCase(unittest.TestCase):
         ctx_d = context_store["D"]
 
         prev_state_ids = yield ctx_d.get_prev_state_ids()
-        self.assertSetEqual(
-            {"A1", "A2", "A3", "A5", "B"}, {e for e in prev_state_ids.values()}
-        )
+        self.assertSetEqual({"A1", "A2", "A3", "A5", "B"}, set(prev_state_ids.values()))
 
         self.assertEqual(ctx_b.state_group, ctx_d.state_group_before_event)
         self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group)
@@ -482,7 +476,7 @@ class StateTestCase(unittest.TestCase):
         current_state_ids = yield context.get_current_state_ids()
 
         self.assertEqual(
-            set([e.event_id for e in old_state]), set(current_state_ids.values())
+            {e.event_id for e in old_state}, set(current_state_ids.values())
         )
 
         self.assertEqual(group_name, context.state_group)
@@ -513,9 +507,7 @@ class StateTestCase(unittest.TestCase):
 
         prev_state_ids = yield context.get_prev_state_ids()
 
-        self.assertEqual(
-            set([e.event_id for e in old_state]), set(prev_state_ids.values())
-        )
+        self.assertEqual({e.event_id for e in old_state}, set(prev_state_ids.values()))
 
         self.assertIsNotNone(context.state_group)
 
diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py
index f2be63706b..72a9de5370 100644
--- a/tests/util/test_stream_change_cache.py
+++ b/tests/util/test_stream_change_cache.py
@@ -67,7 +67,7 @@ class StreamChangeCacheTests(unittest.TestCase):
         # If we update an existing entity, it keeps the two existing entities
         cache.entity_has_changed("bar@baz.net", 5)
         self.assertEqual(
-            set(["bar@baz.net", "user@elsewhere.org"]), set(cache._entity_to_key)
+            {"bar@baz.net", "user@elsewhere.org"}, set(cache._entity_to_key)
         )
 
     def test_get_all_entities_changed(self):
@@ -137,7 +137,7 @@ class StreamChangeCacheTests(unittest.TestCase):
             cache.get_entities_changed(
                 ["user@foo.com", "bar@baz.net", "user@elsewhere.org"], stream_pos=2
             ),
-            set(["bar@baz.net", "user@elsewhere.org"]),
+            {"bar@baz.net", "user@elsewhere.org"},
         )
 
         # Query all the entries mid-way through the stream, but include one
@@ -153,7 +153,7 @@ class StreamChangeCacheTests(unittest.TestCase):
                 ],
                 stream_pos=2,
             ),
-            set(["bar@baz.net", "user@elsewhere.org"]),
+            {"bar@baz.net", "user@elsewhere.org"},
         )
 
         # Query all the entries, but before the first known point. We will get
@@ -168,21 +168,13 @@ class StreamChangeCacheTests(unittest.TestCase):
                 ],
                 stream_pos=0,
             ),
-            set(
-                [
-                    "user@foo.com",
-                    "bar@baz.net",
-                    "user@elsewhere.org",
-                    "not@here.website",
-                ]
-            ),
+            {"user@foo.com", "bar@baz.net", "user@elsewhere.org", "not@here.website"},
         )
 
         # Query a subset of the entries mid-way through the stream. We should
         # only get back the subset.
         self.assertEqual(
-            cache.get_entities_changed(["bar@baz.net"], stream_pos=2),
-            set(["bar@baz.net"]),
+            cache.get_entities_changed(["bar@baz.net"], stream_pos=2), {"bar@baz.net"},
         )
 
     def test_max_pos(self):
diff --git a/tox.ini b/tox.ini
index b9132a3177..b715ea0bff 100644
--- a/tox.ini
+++ b/tox.ini
@@ -123,6 +123,7 @@ skip_install = True
 basepython = python3.6
 deps =
     flake8
+    flake8-comprehensions
     black==19.10b0  # We pin so that our tests don't start failing on new releases of black.
 commands =
     python -m black --check --diff .
-- 
cgit 1.4.1


From 39230d217104f3cd7aba9065dc478f935ce1e614 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 24 Mar 2020 14:45:33 +0000
Subject: Clean up some LoggingContext stuff (#7120)

* Pull Sentinel out of LoggingContext

... and drop a few unnecessary references to it

* Factor out LoggingContext.current_context

move `current_context` and `set_context` out to top-level functions.

Mostly this means that I can more easily trace what's actually referring to
LoggingContext, but I think it's generally neater.

* move copy-to-parent into `stop`

this really just makes `start` and `stop` more symetric. It also means that it
behaves correctly if you manually `set_log_context` rather than using the
context manager.

* Replace `LoggingContext.alive` with `finished`

Turn `alive` into `finished` and make it a bit better defined.
---
 changelog.d/7120.misc                              |   1 +
 docs/log_contexts.md                               |   5 +-
 synapse/crypto/keyring.py                          |   4 +-
 synapse/federation/federation_base.py              |   4 +-
 synapse/handlers/sync.py                           |   4 +-
 synapse/http/request_metrics.py                    |   6 +-
 synapse/logging/_structured.py                     |   4 +-
 synapse/logging/context.py                         | 234 +++++++++++----------
 synapse/logging/scopecontextmanager.py             |  13 +-
 synapse/storage/data_stores/main/events_worker.py  |   4 +-
 synapse/storage/database.py                        |  11 +-
 synapse/util/metrics.py                            |   4 +-
 synapse/util/patch_inline_callbacks.py             |  36 ++--
 tests/crypto/test_keyring.py                       |   7 +-
 .../federation/test_matrix_federation_agent.py     |   6 +-
 tests/http/federation/test_srv_resolver.py         |   6 +-
 tests/http/test_fedclient.py                       |   6 +-
 tests/rest/client/test_transactions.py             |  16 +-
 tests/unittest.py                                  |  12 +-
 tests/util/caches/test_descriptors.py              |  22 +-
 tests/util/test_async_utils.py                     |  15 +-
 tests/util/test_linearizer.py                      |   6 +-
 tests/util/test_logcontext.py                      |  22 +-
 tests/utils.py                                     |   6 +-
 24 files changed, 232 insertions(+), 222 deletions(-)
 create mode 100644 changelog.d/7120.misc

(limited to 'tests/util')

diff --git a/changelog.d/7120.misc b/changelog.d/7120.misc
new file mode 100644
index 0000000000..731f4dcb52
--- /dev/null
+++ b/changelog.d/7120.misc
@@ -0,0 +1 @@
+Clean up some LoggingContext code.
diff --git a/docs/log_contexts.md b/docs/log_contexts.md
index 5331e8c88b..fe30ca2791 100644
--- a/docs/log_contexts.md
+++ b/docs/log_contexts.md
@@ -29,14 +29,13 @@ from synapse.logging import context         # omitted from future snippets
 def handle_request(request_id):
     request_context = context.LoggingContext()
 
-    calling_context = context.LoggingContext.current_context()
-    context.LoggingContext.set_current_context(request_context)
+    calling_context = context.set_current_context(request_context)
     try:
         request_context.request = request_id
         do_request_handling()
         logger.debug("finished")
     finally:
-        context.LoggingContext.set_current_context(calling_context)
+        context.set_current_context(calling_context)
 
 def do_request_handling():
     logger.debug("phew")  # this will be logged against request_id
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 983f0ead8c..a9f4025bfe 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -43,8 +43,8 @@ from synapse.api.errors import (
     SynapseError,
 )
 from synapse.logging.context import (
-    LoggingContext,
     PreserveLoggingContext,
+    current_context,
     make_deferred_yieldable,
     preserve_fn,
     run_in_background,
@@ -236,7 +236,7 @@ class Keyring(object):
         """
 
         try:
-            ctx = LoggingContext.current_context()
+            ctx = current_context()
 
             # map from server name to a set of outstanding request ids
             server_to_request_ids = {}
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index b0b0eba41e..4b115aac04 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -32,8 +32,8 @@ from synapse.events import EventBase, make_event_from_dict
 from synapse.events.utils import prune_event
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import (
-    LoggingContext,
     PreserveLoggingContext,
+    current_context,
     make_deferred_yieldable,
 )
 from synapse.types import JsonDict, get_domain_from_id
@@ -78,7 +78,7 @@ class FederationBase(object):
         """
         deferreds = _check_sigs_on_pdus(self.keyring, room_version, pdus)
 
-        ctx = LoggingContext.current_context()
+        ctx = current_context()
 
         def callback(_, pdu: EventBase):
             with PreserveLoggingContext(ctx):
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 669dbc8a48..5746fdea14 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -26,7 +26,7 @@ from prometheus_client import Counter
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.filtering import FilterCollection
 from synapse.events import EventBase
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import current_context
 from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.roommember import MemberSummary
 from synapse.storage.state import StateFilter
@@ -301,7 +301,7 @@ class SyncHandler(object):
         else:
             sync_type = "incremental_sync"
 
-        context = LoggingContext.current_context()
+        context = current_context()
         if context:
             context.tag = sync_type
 
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index 58f9cc61c8..b58ae3d9db 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -19,7 +19,7 @@ import threading
 
 from prometheus_client.core import Counter, Histogram
 
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import current_context
 from synapse.metrics import LaterGauge
 
 logger = logging.getLogger(__name__)
@@ -148,7 +148,7 @@ LaterGauge(
 class RequestMetrics(object):
     def start(self, time_sec, name, method):
         self.start = time_sec
-        self.start_context = LoggingContext.current_context()
+        self.start_context = current_context()
         self.name = name
         self.method = method
 
@@ -163,7 +163,7 @@ class RequestMetrics(object):
         with _in_flight_requests_lock:
             _in_flight_requests.discard(self)
 
-        context = LoggingContext.current_context()
+        context = current_context()
 
         tag = ""
         if context:
diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py
index ffa7b20ca8..7372450b45 100644
--- a/synapse/logging/_structured.py
+++ b/synapse/logging/_structured.py
@@ -42,7 +42,7 @@ from synapse.logging._terse_json import (
     TerseJSONToConsoleLogObserver,
     TerseJSONToTCPLogObserver,
 )
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import current_context
 
 
 def stdlib_log_level_to_twisted(level: str) -> LogLevel:
@@ -86,7 +86,7 @@ class LogContextObserver(object):
             ].startswith("Timing out client"):
                 return
 
-        context = LoggingContext.current_context()
+        context = current_context()
 
         # Copy the context information to the log event.
         if context is not None:
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 860b99a4c6..a8eafb1c7c 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -175,7 +175,54 @@ class ContextResourceUsage(object):
         return res
 
 
-LoggingContextOrSentinel = Union["LoggingContext", "LoggingContext.Sentinel"]
+LoggingContextOrSentinel = Union["LoggingContext", "_Sentinel"]
+
+
+class _Sentinel(object):
+    """Sentinel to represent the root context"""
+
+    __slots__ = ["previous_context", "finished", "request", "scope", "tag"]
+
+    def __init__(self) -> None:
+        # Minimal set for compatibility with LoggingContext
+        self.previous_context = None
+        self.finished = False
+        self.request = None
+        self.scope = None
+        self.tag = None
+
+    def __str__(self):
+        return "sentinel"
+
+    def copy_to(self, record):
+        pass
+
+    def copy_to_twisted_log_entry(self, record):
+        record["request"] = None
+        record["scope"] = None
+
+    def start(self):
+        pass
+
+    def stop(self):
+        pass
+
+    def add_database_transaction(self, duration_sec):
+        pass
+
+    def add_database_scheduled(self, sched_sec):
+        pass
+
+    def record_event_fetch(self, event_count):
+        pass
+
+    def __nonzero__(self):
+        return False
+
+    __bool__ = __nonzero__  # python3
+
+
+SENTINEL_CONTEXT = _Sentinel()
 
 
 class LoggingContext(object):
@@ -199,76 +246,33 @@ class LoggingContext(object):
         "_resource_usage",
         "usage_start",
         "main_thread",
-        "alive",
+        "finished",
         "request",
         "tag",
         "scope",
     ]
 
-    thread_local = threading.local()
-
-    class Sentinel(object):
-        """Sentinel to represent the root context"""
-
-        __slots__ = ["previous_context", "alive", "request", "scope", "tag"]
-
-        def __init__(self) -> None:
-            # Minimal set for compatibility with LoggingContext
-            self.previous_context = None
-            self.alive = None
-            self.request = None
-            self.scope = None
-            self.tag = None
-
-        def __str__(self):
-            return "sentinel"
-
-        def copy_to(self, record):
-            pass
-
-        def copy_to_twisted_log_entry(self, record):
-            record["request"] = None
-            record["scope"] = None
-
-        def start(self):
-            pass
-
-        def stop(self):
-            pass
-
-        def add_database_transaction(self, duration_sec):
-            pass
-
-        def add_database_scheduled(self, sched_sec):
-            pass
-
-        def record_event_fetch(self, event_count):
-            pass
-
-        def __nonzero__(self):
-            return False
-
-        __bool__ = __nonzero__  # python3
-
-    sentinel = Sentinel()
-
     def __init__(self, name=None, parent_context=None, request=None) -> None:
-        self.previous_context = LoggingContext.current_context()
+        self.previous_context = current_context()
         self.name = name
 
         # track the resources used by this context so far
         self._resource_usage = ContextResourceUsage()
 
-        # If alive has the thread resource usage when the logcontext last
-        # became active.
+        # The thread resource usage when the logcontext became active. None
+        # if the context is not currently active.
         self.usage_start = None
 
         self.main_thread = get_thread_id()
         self.request = None
         self.tag = ""
-        self.alive = True
         self.scope = None  # type: Optional[_LogContextScope]
 
+        # keep track of whether we have hit the __exit__ block for this context
+        # (suggesting that the the thing that created the context thinks it should
+        # be finished, and that re-activating it would suggest an error).
+        self.finished = False
+
         self.parent_context = parent_context
 
         if self.parent_context is not None:
@@ -283,44 +287,15 @@ class LoggingContext(object):
             return str(self.request)
         return "%s@%x" % (self.name, id(self))
 
-    @classmethod
-    def current_context(cls) -> LoggingContextOrSentinel:
-        """Get the current logging context from thread local storage
-
-        Returns:
-            LoggingContext: the current logging context
-        """
-        return getattr(cls.thread_local, "current_context", cls.sentinel)
-
-    @classmethod
-    def set_current_context(
-        cls, context: LoggingContextOrSentinel
-    ) -> LoggingContextOrSentinel:
-        """Set the current logging context in thread local storage
-        Args:
-            context(LoggingContext): The context to activate.
-        Returns:
-            The context that was previously active
-        """
-        current = cls.current_context()
-
-        if current is not context:
-            current.stop()
-            cls.thread_local.current_context = context
-            context.start()
-        return current
-
     def __enter__(self) -> "LoggingContext":
         """Enters this logging context into thread local storage"""
-        old_context = self.set_current_context(self)
+        old_context = set_current_context(self)
         if self.previous_context != old_context:
             logger.warning(
                 "Expected previous context %r, found %r",
                 self.previous_context,
                 old_context,
             )
-        self.alive = True
-
         return self
 
     def __exit__(self, type, value, traceback) -> None:
@@ -329,24 +304,19 @@ class LoggingContext(object):
         Returns:
             None to avoid suppressing any exceptions that were thrown.
         """
-        current = self.set_current_context(self.previous_context)
+        current = set_current_context(self.previous_context)
         if current is not self:
-            if current is self.sentinel:
+            if current is SENTINEL_CONTEXT:
                 logger.warning("Expected logging context %s was lost", self)
             else:
                 logger.warning(
                     "Expected logging context %s but found %s", self, current
                 )
-        self.alive = False
-
-        # if we have a parent, pass our CPU usage stats on
-        if self.parent_context is not None and hasattr(
-            self.parent_context, "_resource_usage"
-        ):
-            self.parent_context._resource_usage += self._resource_usage
 
-            # reset them in case we get entered again
-            self._resource_usage.reset()
+        # the fact that we are here suggests that the caller thinks that everything
+        # is done and dusted for this logcontext, and further activity will not get
+        # recorded against the correct metrics.
+        self.finished = True
 
     def copy_to(self, record) -> None:
         """Copy logging fields from this context to a log record or
@@ -371,9 +341,14 @@ class LoggingContext(object):
             logger.warning("Started logcontext %s on different thread", self)
             return
 
+        if self.finished:
+            logger.warning("Re-starting finished log context %s", self)
+
         # If we haven't already started record the thread resource usage so
         # far
-        if not self.usage_start:
+        if self.usage_start:
+            logger.warning("Re-starting already-active log context %s", self)
+        else:
             self.usage_start = get_thread_resource_usage()
 
     def stop(self) -> None:
@@ -396,6 +371,15 @@ class LoggingContext(object):
 
         self.usage_start = None
 
+        # if we have a parent, pass our CPU usage stats on
+        if self.parent_context is not None and hasattr(
+            self.parent_context, "_resource_usage"
+        ):
+            self.parent_context._resource_usage += self._resource_usage
+
+            # reset them in case we get entered again
+            self._resource_usage.reset()
+
     def get_resource_usage(self) -> ContextResourceUsage:
         """Get resources used by this logcontext so far.
 
@@ -409,7 +393,7 @@ class LoggingContext(object):
         # If we are on the correct thread and we're currently running then we
         # can include resource usage so far.
         is_main_thread = get_thread_id() == self.main_thread
-        if self.alive and self.usage_start and is_main_thread:
+        if self.usage_start and is_main_thread:
             utime_delta, stime_delta = self._get_cputime()
             res.ru_utime += utime_delta
             res.ru_stime += stime_delta
@@ -492,7 +476,7 @@ class LoggingContextFilter(logging.Filter):
         Returns:
             True to include the record in the log output.
         """
-        context = LoggingContext.current_context()
+        context = current_context()
         for key, value in self.defaults.items():
             setattr(record, key, value)
 
@@ -512,27 +496,24 @@ class PreserveLoggingContext(object):
 
     __slots__ = ["current_context", "new_context", "has_parent"]
 
-    def __init__(self, new_context: Optional[LoggingContextOrSentinel] = None) -> None:
-        if new_context is None:
-            self.new_context = LoggingContext.sentinel  # type: LoggingContextOrSentinel
-        else:
-            self.new_context = new_context
+    def __init__(
+        self, new_context: LoggingContextOrSentinel = SENTINEL_CONTEXT
+    ) -> None:
+        self.new_context = new_context
 
     def __enter__(self) -> None:
         """Captures the current logging context"""
-        self.current_context = LoggingContext.set_current_context(self.new_context)
+        self.current_context = set_current_context(self.new_context)
 
         if self.current_context:
             self.has_parent = self.current_context.previous_context is not None
-            if not self.current_context.alive:
-                logger.debug("Entering dead context: %s", self.current_context)
 
     def __exit__(self, type, value, traceback) -> None:
         """Restores the current logging context"""
-        context = LoggingContext.set_current_context(self.current_context)
+        context = set_current_context(self.current_context)
 
         if context != self.new_context:
-            if context is LoggingContext.sentinel:
+            if not context:
                 logger.warning("Expected logging context %s was lost", self.new_context)
             else:
                 logger.warning(
@@ -541,9 +522,30 @@ class PreserveLoggingContext(object):
                     context,
                 )
 
-        if self.current_context is not LoggingContext.sentinel:
-            if not self.current_context.alive:
-                logger.debug("Restoring dead context: %s", self.current_context)
+
+_thread_local = threading.local()
+_thread_local.current_context = SENTINEL_CONTEXT
+
+
+def current_context() -> LoggingContextOrSentinel:
+    """Get the current logging context from thread local storage"""
+    return getattr(_thread_local, "current_context", SENTINEL_CONTEXT)
+
+
+def set_current_context(context: LoggingContextOrSentinel) -> LoggingContextOrSentinel:
+    """Set the current logging context in thread local storage
+    Args:
+        context(LoggingContext): The context to activate.
+    Returns:
+        The context that was previously active
+    """
+    current = current_context()
+
+    if current is not context:
+        current.stop()
+        _thread_local.current_context = context
+        context.start()
+    return current
 
 
 def nested_logging_context(
@@ -572,7 +574,7 @@ def nested_logging_context(
     if parent_context is not None:
         context = parent_context  # type: LoggingContextOrSentinel
     else:
-        context = LoggingContext.current_context()
+        context = current_context()
     return LoggingContext(
         parent_context=context, request=str(context.request) + "-" + suffix
     )
@@ -604,7 +606,7 @@ def run_in_background(f, *args, **kwargs):
     CRITICAL error about an unhandled error will be logged without much
     indication about where it came from.
     """
-    current = LoggingContext.current_context()
+    current = current_context()
     try:
         res = f(*args, **kwargs)
     except:  # noqa: E722
@@ -625,7 +627,7 @@ def run_in_background(f, *args, **kwargs):
 
     # The function may have reset the context before returning, so
     # we need to restore it now.
-    ctx = LoggingContext.set_current_context(current)
+    ctx = set_current_context(current)
 
     # The original context will be restored when the deferred
     # completes, but there is nothing waiting for it, so it will
@@ -674,7 +676,7 @@ def make_deferred_yieldable(deferred):
 
     # ok, we can't be sure that a yield won't block, so let's reset the
     # logcontext, and add a callback to the deferred to restore it.
-    prev_context = LoggingContext.set_current_context(LoggingContext.sentinel)
+    prev_context = set_current_context(SENTINEL_CONTEXT)
     deferred.addBoth(_set_context_cb, prev_context)
     return deferred
 
@@ -684,7 +686,7 @@ ResultT = TypeVar("ResultT")
 
 def _set_context_cb(result: ResultT, context: LoggingContext) -> ResultT:
     """A callback function which just sets the logging context"""
-    LoggingContext.set_current_context(context)
+    set_current_context(context)
     return result
 
 
@@ -752,7 +754,7 @@ def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
         Deferred: A Deferred which fires a callback with the result of `f`, or an
             errback if `f` throws an exception.
     """
-    logcontext = LoggingContext.current_context()
+    logcontext = current_context()
 
     def g():
         with LoggingContext(parent_context=logcontext):
diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py
index 4eed4f2338..dc3ab00cbb 100644
--- a/synapse/logging/scopecontextmanager.py
+++ b/synapse/logging/scopecontextmanager.py
@@ -19,7 +19,7 @@ from opentracing import Scope, ScopeManager
 
 import twisted
 
-from synapse.logging.context import LoggingContext, nested_logging_context
+from synapse.logging.context import current_context, nested_logging_context
 
 logger = logging.getLogger(__name__)
 
@@ -49,11 +49,8 @@ class LogContextScopeManager(ScopeManager):
             (Scope) : the Scope that is active, or None if not
             available.
         """
-        ctx = LoggingContext.current_context()
-        if ctx is LoggingContext.sentinel:
-            return None
-        else:
-            return ctx.scope
+        ctx = current_context()
+        return ctx.scope
 
     def activate(self, span, finish_on_close):
         """
@@ -70,9 +67,9 @@ class LogContextScopeManager(ScopeManager):
         """
 
         enter_logcontext = False
-        ctx = LoggingContext.current_context()
+        ctx = current_context()
 
-        if ctx is LoggingContext.sentinel:
+        if not ctx:
             # We don't want this scope to affect.
             logger.error("Tried to activate scope outside of loggingcontext")
             return Scope(None, span)
diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py
index ca237c6f12..3013f49d32 100644
--- a/synapse/storage/data_stores/main/events_worker.py
+++ b/synapse/storage/data_stores/main/events_worker.py
@@ -35,7 +35,7 @@ from synapse.api.room_versions import (
 )
 from synapse.events import make_event_from_dict
 from synapse.events.utils import prune_event
-from synapse.logging.context import LoggingContext, PreserveLoggingContext
+from synapse.logging.context import PreserveLoggingContext, current_context
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
 from synapse.storage.database import Database
@@ -409,7 +409,7 @@ class EventsWorkerStore(SQLBaseStore):
         missing_events_ids = [e for e in event_ids if e not in event_entry_map]
 
         if missing_events_ids:
-            log_ctx = LoggingContext.current_context()
+            log_ctx = current_context()
             log_ctx.record_event_fetch(len(missing_events_ids))
 
             # Note that _get_events_from_db is also responsible for turning db rows
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index e61595336c..715c0346dd 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -32,6 +32,7 @@ from synapse.config.database import DatabaseConnectionConfig
 from synapse.logging.context import (
     LoggingContext,
     LoggingContextOrSentinel,
+    current_context,
     make_deferred_yieldable,
 )
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -483,7 +484,7 @@ class Database(object):
             end = monotonic_time()
             duration = end - start
 
-            LoggingContext.current_context().add_database_transaction(duration)
+            current_context().add_database_transaction(duration)
 
             transaction_logger.debug("[TXN END] {%s} %f sec", name, duration)
 
@@ -510,7 +511,7 @@ class Database(object):
         after_callbacks = []  # type: List[_CallbackListEntry]
         exception_callbacks = []  # type: List[_CallbackListEntry]
 
-        if LoggingContext.current_context() == LoggingContext.sentinel:
+        if not current_context():
             logger.warning("Starting db txn '%s' from sentinel context", desc)
 
         try:
@@ -547,10 +548,8 @@ class Database(object):
         Returns:
             Deferred: The result of func
         """
-        parent_context = (
-            LoggingContext.current_context()
-        )  # type: Optional[LoggingContextOrSentinel]
-        if parent_context == LoggingContext.sentinel:
+        parent_context = current_context()  # type: Optional[LoggingContextOrSentinel]
+        if not parent_context:
             logger.warning(
                 "Starting db connection from sentinel context: metrics will be lost"
             )
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 7b18455469..ec61e14423 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -21,7 +21,7 @@ from prometheus_client import Counter
 
 from twisted.internet import defer
 
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import LoggingContext, current_context
 from synapse.metrics import InFlightGauge
 
 logger = logging.getLogger(__name__)
@@ -106,7 +106,7 @@ class Measure(object):
             raise RuntimeError("Measure() objects cannot be re-used")
 
         self.start = self.clock.time()
-        parent_context = LoggingContext.current_context()
+        parent_context = current_context()
         self._logging_context = LoggingContext(
             "Measure[%s]" % (self.name,), parent_context
         )
diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 3925927f9f..fdff195771 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -32,7 +32,7 @@ def do_patch():
     Patch defer.inlineCallbacks so that it checks the state of the logcontext on exit
     """
 
-    from synapse.logging.context import LoggingContext
+    from synapse.logging.context import current_context
 
     global _already_patched
 
@@ -43,35 +43,35 @@ def do_patch():
     def new_inline_callbacks(f):
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
-            start_context = LoggingContext.current_context()
+            start_context = current_context()
             changes = []  # type: List[str]
             orig = orig_inline_callbacks(_check_yield_points(f, changes))
 
             try:
                 res = orig(*args, **kwargs)
             except Exception:
-                if LoggingContext.current_context() != start_context:
+                if current_context() != start_context:
                     for err in changes:
                         print(err, file=sys.stderr)
 
                     err = "%s changed context from %s to %s on exception" % (
                         f,
                         start_context,
-                        LoggingContext.current_context(),
+                        current_context(),
                     )
                     print(err, file=sys.stderr)
                     raise Exception(err)
                 raise
 
             if not isinstance(res, Deferred) or res.called:
-                if LoggingContext.current_context() != start_context:
+                if current_context() != start_context:
                     for err in changes:
                         print(err, file=sys.stderr)
 
                     err = "Completed %s changed context from %s to %s" % (
                         f,
                         start_context,
-                        LoggingContext.current_context(),
+                        current_context(),
                     )
                     # print the error to stderr because otherwise all we
                     # see in travis-ci is the 500 error
@@ -79,23 +79,23 @@ def do_patch():
                     raise Exception(err)
                 return res
 
-            if LoggingContext.current_context() != LoggingContext.sentinel:
+            if current_context():
                 err = (
                     "%s returned incomplete deferred in non-sentinel context "
                     "%s (start was %s)"
-                ) % (f, LoggingContext.current_context(), start_context)
+                ) % (f, current_context(), start_context)
                 print(err, file=sys.stderr)
                 raise Exception(err)
 
             def check_ctx(r):
-                if LoggingContext.current_context() != start_context:
+                if current_context() != start_context:
                     for err in changes:
                         print(err, file=sys.stderr)
                     err = "%s completion of %s changed context from %s to %s" % (
                         "Failure" if isinstance(r, Failure) else "Success",
                         f,
                         start_context,
-                        LoggingContext.current_context(),
+                        current_context(),
                     )
                     print(err, file=sys.stderr)
                     raise Exception(err)
@@ -127,7 +127,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
         function
     """
 
-    from synapse.logging.context import LoggingContext
+    from synapse.logging.context import current_context
 
     @functools.wraps(f)
     def check_yield_points_inner(*args, **kwargs):
@@ -136,7 +136,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
         last_yield_line_no = gen.gi_frame.f_lineno
         result = None  # type: Any
         while True:
-            expected_context = LoggingContext.current_context()
+            expected_context = current_context()
 
             try:
                 isFailure = isinstance(result, Failure)
@@ -145,7 +145,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
                 else:
                     d = gen.send(result)
             except (StopIteration, defer._DefGen_Return) as e:
-                if LoggingContext.current_context() != expected_context:
+                if current_context() != expected_context:
                     # This happens when the context is lost sometime *after* the
                     # final yield and returning. E.g. we forgot to yield on a
                     # function that returns a deferred.
@@ -159,7 +159,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
                         % (
                             f.__qualname__,
                             expected_context,
-                            LoggingContext.current_context(),
+                            current_context(),
                             f.__code__.co_filename,
                             last_yield_line_no,
                         )
@@ -173,13 +173,13 @@ def _check_yield_points(f: Callable, changes: List[str]):
                 # This happens if we yield on a deferred that doesn't follow
                 # the log context rules without wrapping in a `make_deferred_yieldable`.
                 # We raise here as this should never happen.
-                if LoggingContext.current_context() is not LoggingContext.sentinel:
+                if current_context():
                     err = (
                         "%s yielded with context %s rather than sentinel,"
                         " yielded on line %d in %s"
                         % (
                             frame.f_code.co_name,
-                            LoggingContext.current_context(),
+                            current_context(),
                             frame.f_lineno,
                             frame.f_code.co_filename,
                         )
@@ -191,7 +191,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
             except Exception as e:
                 result = Failure(e)
 
-            if LoggingContext.current_context() != expected_context:
+            if current_context() != expected_context:
 
                 # This happens because the context is lost sometime *after* the
                 # previous yield and *after* the current yield. E.g. the
@@ -206,7 +206,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
                     % (
                         frame.f_code.co_name,
                         expected_context,
-                        LoggingContext.current_context(),
+                        current_context(),
                         last_yield_line_no,
                         frame.f_lineno,
                         frame.f_code.co_filename,
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 34d5895f18..70c8e72303 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -34,6 +34,7 @@ from synapse.crypto.keyring import (
 from synapse.logging.context import (
     LoggingContext,
     PreserveLoggingContext,
+    current_context,
     make_deferred_yieldable,
 )
 from synapse.storage.keys import FetchKeyResult
@@ -83,9 +84,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         )
 
     def check_context(self, _, expected):
-        self.assertEquals(
-            getattr(LoggingContext.current_context(), "request", None), expected
-        )
+        self.assertEquals(getattr(current_context(), "request", None), expected)
 
     def test_verify_json_objects_for_server_awaits_previous_requests(self):
         key1 = signedjson.key.generate_signing_key(1)
@@ -105,7 +104,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
 
         @defer.inlineCallbacks
         def get_perspectives(**kwargs):
-            self.assertEquals(LoggingContext.current_context().request, "11")
+            self.assertEquals(current_context().request, "11")
             with PreserveLoggingContext():
                 yield persp_deferred
             return persp_resp
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index fdc1d918ff..562397cdda 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -38,7 +38,7 @@ from synapse.http.federation.well_known_resolver import (
     WellKnownResolver,
     _cache_period_from_headers,
 )
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context
 from synapse.util.caches.ttlcache import TTLCache
 
 from tests import unittest
@@ -155,7 +155,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
             self.assertNoResult(fetch_d)
 
             # should have reset logcontext to the sentinel
-            _check_logcontext(LoggingContext.sentinel)
+            _check_logcontext(SENTINEL_CONTEXT)
 
             try:
                 fetch_res = yield fetch_d
@@ -1197,7 +1197,7 @@ class TestCachePeriodFromHeaders(unittest.TestCase):
 
 
 def _check_logcontext(context):
-    current = LoggingContext.current_context()
+    current = current_context()
     if current is not context:
         raise AssertionError("Expected logcontext %s but was %s" % (context, current))
 
diff --git a/tests/http/federation/test_srv_resolver.py b/tests/http/federation/test_srv_resolver.py
index df034ab237..babc201643 100644
--- a/tests/http/federation/test_srv_resolver.py
+++ b/tests/http/federation/test_srv_resolver.py
@@ -22,7 +22,7 @@ from twisted.internet.error import ConnectError
 from twisted.names import dns, error
 
 from synapse.http.federation.srv_resolver import SrvResolver
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context
 
 from tests import unittest
 from tests.utils import MockClock
@@ -54,12 +54,12 @@ class SrvResolverTestCase(unittest.TestCase):
                 self.assertNoResult(resolve_d)
 
                 # should have reset to the sentinel context
-                self.assertIs(LoggingContext.current_context(), LoggingContext.sentinel)
+                self.assertIs(current_context(), SENTINEL_CONTEXT)
 
                 result = yield resolve_d
 
                 # should have restored our context
-                self.assertIs(LoggingContext.current_context(), ctx)
+                self.assertIs(current_context(), ctx)
 
                 return result
 
diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py
index 2b01f40a42..fff4f0cbf4 100644
--- a/tests/http/test_fedclient.py
+++ b/tests/http/test_fedclient.py
@@ -29,14 +29,14 @@ from synapse.http.matrixfederationclient import (
     MatrixFederationHttpClient,
     MatrixFederationRequest,
 )
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context
 
 from tests.server import FakeTransport
 from tests.unittest import HomeserverTestCase
 
 
 def check_logcontext(context):
-    current = LoggingContext.current_context()
+    current = current_context()
     if current is not context:
         raise AssertionError("Expected logcontext %s but was %s" % (context, current))
 
@@ -64,7 +64,7 @@ class FederationClientTests(HomeserverTestCase):
                 self.assertNoResult(fetch_d)
 
                 # should have reset logcontext to the sentinel
-                check_logcontext(LoggingContext.sentinel)
+                check_logcontext(SENTINEL_CONTEXT)
 
                 try:
                     fetch_res = yield fetch_d
diff --git a/tests/rest/client/test_transactions.py b/tests/rest/client/test_transactions.py
index a3d7e3c046..171632e195 100644
--- a/tests/rest/client/test_transactions.py
+++ b/tests/rest/client/test_transactions.py
@@ -2,7 +2,7 @@ from mock import Mock, call
 
 from twisted.internet import defer, reactor
 
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context
 from synapse.rest.client.transactions import CLEANUP_PERIOD_MS, HttpTransactionCache
 from synapse.util import Clock
 
@@ -52,14 +52,14 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
         def test():
             with LoggingContext("c") as c1:
                 res = yield self.cache.fetch_or_execute(self.mock_key, cb)
-                self.assertIs(LoggingContext.current_context(), c1)
+                self.assertIs(current_context(), c1)
                 self.assertEqual(res, "yay")
 
         # run the test twice in parallel
         d = defer.gatherResults([test(), test()])
-        self.assertIs(LoggingContext.current_context(), LoggingContext.sentinel)
+        self.assertIs(current_context(), SENTINEL_CONTEXT)
         yield d
-        self.assertIs(LoggingContext.current_context(), LoggingContext.sentinel)
+        self.assertIs(current_context(), SENTINEL_CONTEXT)
 
     @defer.inlineCallbacks
     def test_does_not_cache_exceptions(self):
@@ -81,11 +81,11 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
                 yield self.cache.fetch_or_execute(self.mock_key, cb)
             except Exception as e:
                 self.assertEqual(e.args[0], "boo")
-            self.assertIs(LoggingContext.current_context(), test_context)
+            self.assertIs(current_context(), test_context)
 
             res = yield self.cache.fetch_or_execute(self.mock_key, cb)
             self.assertEqual(res, self.mock_http_response)
-            self.assertIs(LoggingContext.current_context(), test_context)
+            self.assertIs(current_context(), test_context)
 
     @defer.inlineCallbacks
     def test_does_not_cache_failures(self):
@@ -107,11 +107,11 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
                 yield self.cache.fetch_or_execute(self.mock_key, cb)
             except Exception as e:
                 self.assertEqual(e.args[0], "boo")
-            self.assertIs(LoggingContext.current_context(), test_context)
+            self.assertIs(current_context(), test_context)
 
             res = yield self.cache.fetch_or_execute(self.mock_key, cb)
             self.assertEqual(res, self.mock_http_response)
-            self.assertIs(LoggingContext.current_context(), test_context)
+            self.assertIs(current_context(), test_context)
 
     @defer.inlineCallbacks
     def test_cleans_up(self):
diff --git a/tests/unittest.py b/tests/unittest.py
index 8816a4d152..439174dbfc 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -38,7 +38,11 @@ from synapse.config.ratelimiting import FederationRateLimitConfig
 from synapse.federation.transport import server as federation_server
 from synapse.http.server import JsonResource
 from synapse.http.site import SynapseRequest, SynapseSite
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import (
+    SENTINEL_CONTEXT,
+    current_context,
+    set_current_context,
+)
 from synapse.server import HomeServer
 from synapse.types import Requester, UserID, create_requester
 from synapse.util.ratelimitutils import FederationRateLimiter
@@ -97,10 +101,10 @@ class TestCase(unittest.TestCase):
         def setUp(orig):
             # if we're not starting in the sentinel logcontext, then to be honest
             # all future bets are off.
-            if LoggingContext.current_context() is not LoggingContext.sentinel:
+            if current_context():
                 self.fail(
                     "Test starting with non-sentinel logging context %s"
-                    % (LoggingContext.current_context(),)
+                    % (current_context(),)
                 )
 
             old_level = logging.getLogger().level
@@ -122,7 +126,7 @@ class TestCase(unittest.TestCase):
             # force a GC to workaround problems with deferreds leaking logcontexts when
             # they are GCed (see the logcontext docs)
             gc.collect()
-            LoggingContext.set_current_context(LoggingContext.sentinel)
+            set_current_context(SENTINEL_CONTEXT)
 
             return ret
 
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 39e360fe24..4d2b9e0d64 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -22,8 +22,10 @@ from twisted.internet import defer, reactor
 
 from synapse.api.errors import SynapseError
 from synapse.logging.context import (
+    SENTINEL_CONTEXT,
     LoggingContext,
     PreserveLoggingContext,
+    current_context,
     make_deferred_yieldable,
 )
 from synapse.util.caches import descriptors
@@ -194,7 +196,7 @@ class DescriptorTestCase(unittest.TestCase):
             with LoggingContext() as c1:
                 c1.name = "c1"
                 r = yield obj.fn(1)
-                self.assertEqual(LoggingContext.current_context(), c1)
+                self.assertEqual(current_context(), c1)
             return r
 
         def check_result(r):
@@ -204,12 +206,12 @@ class DescriptorTestCase(unittest.TestCase):
 
         # set off a deferred which will do a cache lookup
         d1 = do_lookup()
-        self.assertEqual(LoggingContext.current_context(), LoggingContext.sentinel)
+        self.assertEqual(current_context(), SENTINEL_CONTEXT)
         d1.addCallback(check_result)
 
         # and another
         d2 = do_lookup()
-        self.assertEqual(LoggingContext.current_context(), LoggingContext.sentinel)
+        self.assertEqual(current_context(), SENTINEL_CONTEXT)
         d2.addCallback(check_result)
 
         # let the lookup complete
@@ -239,14 +241,14 @@ class DescriptorTestCase(unittest.TestCase):
                 try:
                     d = obj.fn(1)
                     self.assertEqual(
-                        LoggingContext.current_context(), LoggingContext.sentinel
+                        current_context(), SENTINEL_CONTEXT,
                     )
                     yield d
                     self.fail("No exception thrown")
                 except SynapseError:
                     pass
 
-                self.assertEqual(LoggingContext.current_context(), c1)
+                self.assertEqual(current_context(), c1)
 
             # the cache should now be empty
             self.assertEqual(len(obj.fn.cache.cache), 0)
@@ -255,7 +257,7 @@ class DescriptorTestCase(unittest.TestCase):
 
         # set off a deferred which will do a cache lookup
         d1 = do_lookup()
-        self.assertEqual(LoggingContext.current_context(), LoggingContext.sentinel)
+        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 
         return d1
 
@@ -366,10 +368,10 @@ class CachedListDescriptorTestCase(unittest.TestCase):
 
             @descriptors.cachedList("fn", "args1", inlineCallbacks=True)
             def list_fn(self, args1, arg2):
-                assert LoggingContext.current_context().request == "c1"
+                assert current_context().request == "c1"
                 # we want this to behave like an asynchronous function
                 yield run_on_reactor()
-                assert LoggingContext.current_context().request == "c1"
+                assert current_context().request == "c1"
                 return self.mock(args1, arg2)
 
         with LoggingContext() as c1:
@@ -377,9 +379,9 @@ class CachedListDescriptorTestCase(unittest.TestCase):
             obj = Cls()
             obj.mock.return_value = {10: "fish", 20: "chips"}
             d1 = obj.list_fn([10, 20], 2)
-            self.assertEqual(LoggingContext.current_context(), LoggingContext.sentinel)
+            self.assertEqual(current_context(), SENTINEL_CONTEXT)
             r = yield d1
-            self.assertEqual(LoggingContext.current_context(), c1)
+            self.assertEqual(current_context(), c1)
             obj.mock.assert_called_once_with([10, 20], 2)
             self.assertEqual(r, {10: "fish", 20: "chips"})
             obj.mock.reset_mock()
diff --git a/tests/util/test_async_utils.py b/tests/util/test_async_utils.py
index f60918069a..17fd86d02d 100644
--- a/tests/util/test_async_utils.py
+++ b/tests/util/test_async_utils.py
@@ -16,7 +16,12 @@ from twisted.internet import defer
 from twisted.internet.defer import CancelledError, Deferred
 from twisted.internet.task import Clock
 
-from synapse.logging.context import LoggingContext, PreserveLoggingContext
+from synapse.logging.context import (
+    SENTINEL_CONTEXT,
+    LoggingContext,
+    PreserveLoggingContext,
+    current_context,
+)
 from synapse.util.async_helpers import timeout_deferred
 
 from tests.unittest import TestCase
@@ -79,10 +84,10 @@ class TimeoutDeferredTest(TestCase):
             # the errbacks should be run in the test logcontext
             def errback(res, deferred_name):
                 self.assertIs(
-                    LoggingContext.current_context(),
+                    current_context(),
                     context_one,
                     "errback %s run in unexpected logcontext %s"
-                    % (deferred_name, LoggingContext.current_context()),
+                    % (deferred_name, current_context()),
                 )
                 return res
 
@@ -90,7 +95,7 @@ class TimeoutDeferredTest(TestCase):
             original_deferred.addErrback(errback, "orig")
             timing_out_d = timeout_deferred(original_deferred, 1.0, self.clock)
             self.assertNoResult(timing_out_d)
-            self.assertIs(LoggingContext.current_context(), LoggingContext.sentinel)
+            self.assertIs(current_context(), SENTINEL_CONTEXT)
             timing_out_d.addErrback(errback, "timingout")
 
             self.clock.pump((1.0,))
@@ -99,4 +104,4 @@ class TimeoutDeferredTest(TestCase):
                 blocking_was_cancelled[0], "non-completing deferred was not cancelled"
             )
             self.failureResultOf(timing_out_d, defer.TimeoutError)
-            self.assertIs(LoggingContext.current_context(), context_one)
+            self.assertIs(current_context(), context_one)
diff --git a/tests/util/test_linearizer.py b/tests/util/test_linearizer.py
index 0ec8ef90ce..852ef23185 100644
--- a/tests/util/test_linearizer.py
+++ b/tests/util/test_linearizer.py
@@ -19,7 +19,7 @@ from six.moves import range
 from twisted.internet import defer, reactor
 from twisted.internet.defer import CancelledError
 
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import LoggingContext, current_context
 from synapse.util import Clock
 from synapse.util.async_helpers import Linearizer
 
@@ -54,11 +54,11 @@ class LinearizerTestCase(unittest.TestCase):
         def func(i, sleep=False):
             with LoggingContext("func(%s)" % i) as lc:
                 with (yield linearizer.queue("")):
-                    self.assertEqual(LoggingContext.current_context(), lc)
+                    self.assertEqual(current_context(), lc)
                     if sleep:
                         yield Clock(reactor).sleep(0)
 
-                self.assertEqual(LoggingContext.current_context(), lc)
+                self.assertEqual(current_context(), lc)
 
         func(0, sleep=True)
         for i in range(1, 100):
diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py
index 281b32c4b8..95301c013c 100644
--- a/tests/util/test_logcontext.py
+++ b/tests/util/test_logcontext.py
@@ -2,8 +2,10 @@ import twisted.python.failure
 from twisted.internet import defer, reactor
 
 from synapse.logging.context import (
+    SENTINEL_CONTEXT,
     LoggingContext,
     PreserveLoggingContext,
+    current_context,
     make_deferred_yieldable,
     nested_logging_context,
     run_in_background,
@@ -15,7 +17,7 @@ from .. import unittest
 
 class LoggingContextTestCase(unittest.TestCase):
     def _check_test_key(self, value):
-        self.assertEquals(LoggingContext.current_context().request, value)
+        self.assertEquals(current_context().request, value)
 
     def test_with_context(self):
         with LoggingContext() as context_one:
@@ -41,7 +43,7 @@ class LoggingContextTestCase(unittest.TestCase):
             self._check_test_key("one")
 
     def _test_run_in_background(self, function):
-        sentinel_context = LoggingContext.current_context()
+        sentinel_context = current_context()
 
         callback_completed = [False]
 
@@ -71,7 +73,7 @@ class LoggingContextTestCase(unittest.TestCase):
             # make sure that the context was reset before it got thrown back
             # into the reactor
             try:
-                self.assertIs(LoggingContext.current_context(), sentinel_context)
+                self.assertIs(current_context(), sentinel_context)
                 d2.callback(None)
             except BaseException:
                 d2.errback(twisted.python.failure.Failure())
@@ -108,7 +110,7 @@ class LoggingContextTestCase(unittest.TestCase):
         async def testfunc():
             self._check_test_key("one")
             d = Clock(reactor).sleep(0)
-            self.assertIs(LoggingContext.current_context(), LoggingContext.sentinel)
+            self.assertIs(current_context(), SENTINEL_CONTEXT)
             await d
             self._check_test_key("one")
 
@@ -129,14 +131,14 @@ class LoggingContextTestCase(unittest.TestCase):
             reactor.callLater(0, d.callback, None)
             return d
 
-        sentinel_context = LoggingContext.current_context()
+        sentinel_context = current_context()
 
         with LoggingContext() as context_one:
             context_one.request = "one"
 
             d1 = make_deferred_yieldable(blocking_function())
             # make sure that the context was reset by make_deferred_yieldable
-            self.assertIs(LoggingContext.current_context(), sentinel_context)
+            self.assertIs(current_context(), sentinel_context)
 
             yield d1
 
@@ -145,14 +147,14 @@ class LoggingContextTestCase(unittest.TestCase):
 
     @defer.inlineCallbacks
     def test_make_deferred_yieldable_with_chained_deferreds(self):
-        sentinel_context = LoggingContext.current_context()
+        sentinel_context = current_context()
 
         with LoggingContext() as context_one:
             context_one.request = "one"
 
             d1 = make_deferred_yieldable(_chained_deferred_function())
             # make sure that the context was reset by make_deferred_yieldable
-            self.assertIs(LoggingContext.current_context(), sentinel_context)
+            self.assertIs(current_context(), sentinel_context)
 
             yield d1
 
@@ -189,14 +191,14 @@ class LoggingContextTestCase(unittest.TestCase):
             reactor.callLater(0, d.callback, None)
             await d
 
-        sentinel_context = LoggingContext.current_context()
+        sentinel_context = current_context()
 
         with LoggingContext() as context_one:
             context_one.request = "one"
 
             d1 = make_deferred_yieldable(blocking_function())
             # make sure that the context was reset by make_deferred_yieldable
-            self.assertIs(LoggingContext.current_context(), sentinel_context)
+            self.assertIs(current_context(), sentinel_context)
 
             yield d1
 
diff --git a/tests/utils.py b/tests/utils.py
index 513f358f4f..968d109f77 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -35,7 +35,7 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.server import DEFAULT_ROOM_VERSION
 from synapse.federation.transport import server as federation_server
 from synapse.http.server import HttpServer
-from synapse.logging.context import LoggingContext
+from synapse.logging.context import current_context, set_current_context
 from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.storage.engines import PostgresEngine, create_engine
@@ -493,10 +493,10 @@ class MockClock(object):
         return self.time() * 1000
 
     def call_later(self, delay, callback, *args, **kwargs):
-        current_context = LoggingContext.current_context()
+        ctx = current_context()
 
         def wrapped_callback():
-            LoggingContext.thread_local.current_context = current_context
+            set_current_context(ctx)
             callback(*args, **kwargs)
 
         t = [self.now + delay, wrapped_callback, False]
-- 
cgit 1.4.1


From 13683a3a223a3f08b295973e7b8aa6e9299a2fa5 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 22 Apr 2020 13:45:40 +0100
Subject: Extend StreamChangeCache to support multiple entities per stream ID
 (#7303)

First some background: StreamChangeCache is used to keep track of what "entities" have
changed since a given stream ID. So for example, we might use it to keep track of when the last
to-device message for a given user was received [1], and hence whether we need to pull any to-device messages from the database on a sync [2].

Now, it turns out that StreamChangeCache didn't support more than one thing being changed at
a given stream_id (this was part of the problem with #7206). However, it's entirely valid to send
to-device messages to more than one user at a time.

As it turns out, this did in fact work, because *some* methods of StreamChangeCache coped
ok with having multiple things changing on the same stream ID, and it seems we never actually
use the methods which don't work on the stream change caches where we allow multiple
changes at the same stream ID. But that feels horribly fragile, hence: let's update
StreamChangeCache to properly support this, and add some typing and some more tests while
we're at it.

[1]: https://github.com/matrix-org/synapse/blob/release-v1.12.3/synapse/storage/data_stores/main/deviceinbox.py#L301
[2]: https://github.com/matrix-org/synapse/blob/release-v1.12.3/synapse/storage/data_stores/main/deviceinbox.py#L47-L51
---
 changelog.d/7303.misc                      |   1 +
 stubs/sortedcontainers/__init__.pyi        |  13 +++
 stubs/sortedcontainers/sorteddict.pyi      | 124 +++++++++++++++++++++++++++++
 synapse/util/caches/stream_change_cache.py | 117 ++++++++++++++++-----------
 tests/util/test_stream_change_cache.py     |  69 +++++++++++++---
 tox.ini                                    |   4 +-
 6 files changed, 272 insertions(+), 56 deletions(-)
 create mode 100644 changelog.d/7303.misc
 create mode 100644 stubs/sortedcontainers/__init__.pyi
 create mode 100644 stubs/sortedcontainers/sorteddict.pyi

(limited to 'tests/util')

diff --git a/changelog.d/7303.misc b/changelog.d/7303.misc
new file mode 100644
index 0000000000..aa89c2b254
--- /dev/null
+++ b/changelog.d/7303.misc
@@ -0,0 +1 @@
+Fix StreamChangeCache to work with multiple entities changing on the same stream id.
diff --git a/stubs/sortedcontainers/__init__.pyi b/stubs/sortedcontainers/__init__.pyi
new file mode 100644
index 0000000000..073b806d3c
--- /dev/null
+++ b/stubs/sortedcontainers/__init__.pyi
@@ -0,0 +1,13 @@
+from .sorteddict import (
+    SortedDict,
+    SortedKeysView,
+    SortedItemsView,
+    SortedValuesView,
+)
+
+__all__ = [
+    "SortedDict",
+    "SortedKeysView",
+    "SortedItemsView",
+    "SortedValuesView",
+]
diff --git a/stubs/sortedcontainers/sorteddict.pyi b/stubs/sortedcontainers/sorteddict.pyi
new file mode 100644
index 0000000000..68779f968e
--- /dev/null
+++ b/stubs/sortedcontainers/sorteddict.pyi
@@ -0,0 +1,124 @@
+# stub for SortedDict. This is a lightly edited copy of
+# https://github.com/grantjenks/python-sortedcontainers/blob/eea42df1f7bad2792e8da77335ff888f04b9e5ae/sortedcontainers/sorteddict.pyi
+# (from https://github.com/grantjenks/python-sortedcontainers/pull/107)
+
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Hashable,
+    Iterator,
+    Iterable,
+    ItemsView,
+    KeysView,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Type,
+    TypeVar,
+    Tuple,
+    Union,
+    ValuesView,
+    overload,
+)
+
+_T = TypeVar("_T")
+_S = TypeVar("_S")
+_T_h = TypeVar("_T_h", bound=Hashable)
+_KT = TypeVar("_KT", bound=Hashable)  # Key type.
+_VT = TypeVar("_VT")  # Value type.
+_KT_co = TypeVar("_KT_co", covariant=True, bound=Hashable)
+_VT_co = TypeVar("_VT_co", covariant=True)
+_SD = TypeVar("_SD", bound=SortedDict)
+_Key = Callable[[_T], Any]
+
+class SortedDict(Dict[_KT, _VT]):
+    @overload
+    def __init__(self, **kwargs: _VT) -> None: ...
+    @overload
+    def __init__(self, __map: Mapping[_KT, _VT], **kwargs: _VT) -> None: ...
+    @overload
+    def __init__(
+        self, __iterable: Iterable[Tuple[_KT, _VT]], **kwargs: _VT
+    ) -> None: ...
+    @overload
+    def __init__(self, __key: _Key[_KT], **kwargs: _VT) -> None: ...
+    @overload
+    def __init__(
+        self, __key: _Key[_KT], __map: Mapping[_KT, _VT], **kwargs: _VT
+    ) -> None: ...
+    @overload
+    def __init__(
+        self, __key: _Key[_KT], __iterable: Iterable[Tuple[_KT, _VT]], **kwargs: _VT
+    ) -> None: ...
+    @property
+    def key(self) -> Optional[_Key[_KT]]: ...
+    @property
+    def iloc(self) -> SortedKeysView[_KT]: ...
+    def clear(self) -> None: ...
+    def __delitem__(self, key: _KT) -> None: ...
+    def __iter__(self) -> Iterator[_KT]: ...
+    def __reversed__(self) -> Iterator[_KT]: ...
+    def __setitem__(self, key: _KT, value: _VT) -> None: ...
+    def _setitem(self, key: _KT, value: _VT) -> None: ...
+    def copy(self: _SD) -> _SD: ...
+    def __copy__(self: _SD) -> _SD: ...
+    @classmethod
+    @overload
+    def fromkeys(cls, seq: Iterable[_T_h]) -> SortedDict[_T_h, None]: ...
+    @classmethod
+    @overload
+    def fromkeys(cls, seq: Iterable[_T_h], value: _S) -> SortedDict[_T_h, _S]: ...
+    def keys(self) -> SortedKeysView[_KT]: ...
+    def items(self) -> SortedItemsView[_KT, _VT]: ...
+    def values(self) -> SortedValuesView[_VT]: ...
+    @overload
+    def pop(self, key: _KT) -> _VT: ...
+    @overload
+    def pop(self, key: _KT, default: _T = ...) -> Union[_VT, _T]: ...
+    def popitem(self, index: int = ...) -> Tuple[_KT, _VT]: ...
+    def peekitem(self, index: int = ...) -> Tuple[_KT, _VT]: ...
+    def setdefault(self, key: _KT, default: Optional[_VT] = ...) -> _VT: ...
+    @overload
+    def update(self, __map: Mapping[_KT, _VT], **kwargs: _VT) -> None: ...
+    @overload
+    def update(self, __iterable: Iterable[Tuple[_KT, _VT]], **kwargs: _VT) -> None: ...
+    @overload
+    def update(self, **kwargs: _VT) -> None: ...
+    def __reduce__(
+        self,
+    ) -> Tuple[
+        Type[SortedDict[_KT, _VT]], Tuple[Callable[[_KT], Any], List[Tuple[_KT, _VT]]],
+    ]: ...
+    def __repr__(self) -> str: ...
+    def _check(self) -> None: ...
+    def islice(
+        self, start: Optional[int] = ..., stop: Optional[int] = ..., reverse=bool,
+    ) -> Iterator[_KT]: ...
+    def bisect_left(self, value: _KT) -> int: ...
+    def bisect_right(self, value: _KT) -> int: ...
+
+class SortedKeysView(KeysView[_KT_co], Sequence[_KT_co]):
+    @overload
+    def __getitem__(self, index: int) -> _KT_co: ...
+    @overload
+    def __getitem__(self, index: slice) -> List[_KT_co]: ...
+    def __delitem__(self, index: Union[int, slice]) -> None: ...
+
+class SortedItemsView(  # type: ignore
+    ItemsView[_KT_co, _VT_co], Sequence[Tuple[_KT_co, _VT_co]]
+):
+    def __iter__(self) -> Iterator[Tuple[_KT_co, _VT_co]]: ...
+    @overload
+    def __getitem__(self, index: int) -> Tuple[_KT_co, _VT_co]: ...
+    @overload
+    def __getitem__(self, index: slice) -> List[Tuple[_KT_co, _VT_co]]: ...
+    def __delitem__(self, index: Union[int, slice]) -> None: ...
+
+class SortedValuesView(ValuesView[_VT_co], Sequence[_VT_co]):
+    @overload
+    def __getitem__(self, index: int) -> _VT_co: ...
+    @overload
+    def __getitem__(self, index: slice) -> List[_VT_co]: ...
+    def __delitem__(self, index: Union[int, slice]) -> None: ...
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index c61d36a82e..38dc3f501e 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+from typing import Dict, Iterable, List, Mapping, Optional, Set
 
 from six import integer_types
 
@@ -23,8 +24,11 @@ from synapse.util import caches
 
 logger = logging.getLogger(__name__)
 
+# for now, assume all entities in the cache are strings
+EntityType = str
 
-class StreamChangeCache(object):
+
+class StreamChangeCache:
     """Keeps track of the stream positions of the latest change in a set of entities.
 
     Typically the entity will be a room or user id.
@@ -34,10 +38,23 @@ class StreamChangeCache(object):
     old then the cache will simply return all given entities.
     """
 
-    def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache=None):
+    def __init__(
+        self,
+        name: str,
+        current_stream_pos: int,
+        max_size=10000,
+        prefilled_cache: Optional[Mapping[EntityType, int]] = None,
+    ):
         self._max_size = int(max_size * caches.CACHE_SIZE_FACTOR)
-        self._entity_to_key = {}
-        self._cache = SortedDict()
+        self._entity_to_key = {}  # type: Dict[EntityType, int]
+
+        # map from stream id to the a set of entities which changed at that stream id.
+        self._cache = SortedDict()  # type: SortedDict[int, Set[EntityType]]
+
+        # the earliest stream_pos for which we can reliably answer
+        # get_all_entities_changed. In other words, one less than the earliest
+        # stream_pos for which we know _cache is valid.
+        #
         self._earliest_known_stream_pos = current_stream_pos
         self.name = name
         self.metrics = caches.register_cache("cache", self.name, self._cache)
@@ -46,7 +63,7 @@ class StreamChangeCache(object):
             for entity, stream_pos in prefilled_cache.items():
                 self.entity_has_changed(entity, stream_pos)
 
-    def has_entity_changed(self, entity, stream_pos):
+    def has_entity_changed(self, entity: EntityType, stream_pos: int) -> bool:
         """Returns True if the entity may have been updated since stream_pos
         """
         assert type(stream_pos) in integer_types
@@ -67,22 +84,17 @@ class StreamChangeCache(object):
         self.metrics.inc_hits()
         return False
 
-    def get_entities_changed(self, entities, stream_pos):
+    def get_entities_changed(
+        self, entities: Iterable[EntityType], stream_pos: int
+    ) -> Set[EntityType]:
         """
         Returns subset of entities that have had new things since the given
         position.  Entities unknown to the cache will be returned.  If the
         position is too old it will just return the given list.
         """
-        assert type(stream_pos) is int
-
-        if stream_pos >= self._earliest_known_stream_pos:
-            changed_entities = {
-                self._cache[k]
-                for k in self._cache.islice(start=self._cache.bisect_right(stream_pos))
-            }
-
-            result = changed_entities.intersection(entities)
-
+        changed_entities = self.get_all_entities_changed(stream_pos)
+        if changed_entities is not None:
+            result = set(changed_entities).intersection(entities)
             self.metrics.inc_hits()
         else:
             result = set(entities)
@@ -90,13 +102,13 @@ class StreamChangeCache(object):
 
         return result
 
-    def has_any_entity_changed(self, stream_pos):
+    def has_any_entity_changed(self, stream_pos: int) -> bool:
         """Returns if any entity has changed
         """
         assert type(stream_pos) is int
 
         if not self._cache:
-            # If we have no cache, nothing can have changed.
+            # If the cache is empty, nothing can have changed.
             return False
 
         if stream_pos >= self._earliest_known_stream_pos:
@@ -106,45 +118,58 @@ class StreamChangeCache(object):
             self.metrics.inc_misses()
             return True
 
-    def get_all_entities_changed(self, stream_pos):
-        """Returns all entites that have had new things since the given
+    def get_all_entities_changed(self, stream_pos: int) -> Optional[List[EntityType]]:
+        """Returns all entities that have had new things since the given
         position. If the position is too old it will return None.
+
+        Returns the entities in the order that they were changed.
         """
         assert type(stream_pos) is int
 
-        if stream_pos >= self._earliest_known_stream_pos:
-            return [
-                self._cache[k]
-                for k in self._cache.islice(start=self._cache.bisect_right(stream_pos))
-            ]
-        else:
+        if stream_pos < self._earliest_known_stream_pos:
             return None
 
-    def entity_has_changed(self, entity, stream_pos):
+        changed_entities = []  # type: List[EntityType]
+
+        for k in self._cache.islice(start=self._cache.bisect_right(stream_pos)):
+            changed_entities.extend(self._cache[k])
+        return changed_entities
+
+    def entity_has_changed(self, entity: EntityType, stream_pos: int) -> None:
         """Informs the cache that the entity has been changed at the given
         position.
         """
         assert type(stream_pos) is int
 
-        # FIXME: add a sanity check here that we are not overwriting existing
-        # data in self._cache
-
-        if stream_pos > self._earliest_known_stream_pos:
-            old_pos = self._entity_to_key.get(entity, None)
-            if old_pos is not None:
-                stream_pos = max(stream_pos, old_pos)
-                self._cache.pop(old_pos, None)
-            self._cache[stream_pos] = entity
-            self._entity_to_key[entity] = stream_pos
-
-            while len(self._cache) > self._max_size:
-                k, r = self._cache.popitem(0)
-                self._earliest_known_stream_pos = max(
-                    k, self._earliest_known_stream_pos
-                )
-                self._entity_to_key.pop(r, None)
-
-    def get_max_pos_of_last_change(self, entity):
+        if stream_pos <= self._earliest_known_stream_pos:
+            return
+
+        old_pos = self._entity_to_key.get(entity, None)
+        if old_pos is not None:
+            if old_pos >= stream_pos:
+                # nothing to do
+                return
+            e = self._cache[old_pos]
+            e.remove(entity)
+            if not e:
+                # cache at this point is now empty
+                del self._cache[old_pos]
+
+        e1 = self._cache.get(stream_pos)
+        if e1 is None:
+            e1 = self._cache[stream_pos] = set()
+        e1.add(entity)
+        self._entity_to_key[entity] = stream_pos
+
+        # if the cache is too big, remove entries
+        while len(self._cache) > self._max_size:
+            k, r = self._cache.popitem(0)
+            self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos)
+            for entity in r:
+                del self._entity_to_key[entity]
+
+    def get_max_pos_of_last_change(self, entity: EntityType) -> int:
+
         """Returns an upper bound of the stream id of the last change to an
         entity.
         """
diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py
index 72a9de5370..6857933540 100644
--- a/tests/util/test_stream_change_cache.py
+++ b/tests/util/test_stream_change_cache.py
@@ -28,18 +28,26 @@ class StreamChangeCacheTests(unittest.TestCase):
         cache.entity_has_changed("user@foo.com", 6)
         cache.entity_has_changed("bar@baz.net", 7)
 
+        # also test multiple things changing on the same stream ID
+        cache.entity_has_changed("user2@foo.com", 8)
+        cache.entity_has_changed("bar2@baz.net", 8)
+
         # If it's been changed after that stream position, return True
         self.assertTrue(cache.has_entity_changed("user@foo.com", 4))
         self.assertTrue(cache.has_entity_changed("bar@baz.net", 4))
+        self.assertTrue(cache.has_entity_changed("bar2@baz.net", 4))
+        self.assertTrue(cache.has_entity_changed("user2@foo.com", 4))
 
         # If it's been changed at that stream position, return False
         self.assertFalse(cache.has_entity_changed("user@foo.com", 6))
+        self.assertFalse(cache.has_entity_changed("user2@foo.com", 8))
 
         # If there's no changes after that stream position, return False
         self.assertFalse(cache.has_entity_changed("user@foo.com", 7))
+        self.assertFalse(cache.has_entity_changed("user2@foo.com", 9))
 
         # If the entity does not exist, return False.
-        self.assertFalse(cache.has_entity_changed("not@here.website", 7))
+        self.assertFalse(cache.has_entity_changed("not@here.website", 9))
 
         # If we request before the stream cache's earliest known position,
         # return True, whether it's a known entity or not.
@@ -47,7 +55,7 @@ class StreamChangeCacheTests(unittest.TestCase):
         self.assertTrue(cache.has_entity_changed("not@here.website", 0))
 
     @patch("synapse.util.caches.CACHE_SIZE_FACTOR", 1.0)
-    def test_has_entity_changed_pops_off_start(self):
+    def test_entity_has_changed_pops_off_start(self):
         """
         StreamChangeCache.entity_has_changed will respect the max size and
         purge the oldest items upon reaching that max size.
@@ -64,11 +72,20 @@ class StreamChangeCacheTests(unittest.TestCase):
         # The oldest item has been popped off
         self.assertTrue("user@foo.com" not in cache._entity_to_key)
 
+        self.assertEqual(
+            cache.get_all_entities_changed(2), ["bar@baz.net", "user@elsewhere.org"],
+        )
+        self.assertIsNone(cache.get_all_entities_changed(1))
+
         # If we update an existing entity, it keeps the two existing entities
         cache.entity_has_changed("bar@baz.net", 5)
         self.assertEqual(
             {"bar@baz.net", "user@elsewhere.org"}, set(cache._entity_to_key)
         )
+        self.assertEqual(
+            cache.get_all_entities_changed(2), ["user@elsewhere.org", "bar@baz.net"],
+        )
+        self.assertIsNone(cache.get_all_entities_changed(1))
 
     def test_get_all_entities_changed(self):
         """
@@ -80,18 +97,52 @@ class StreamChangeCacheTests(unittest.TestCase):
 
         cache.entity_has_changed("user@foo.com", 2)
         cache.entity_has_changed("bar@baz.net", 3)
+        cache.entity_has_changed("anotheruser@foo.com", 3)
         cache.entity_has_changed("user@elsewhere.org", 4)
 
-        self.assertEqual(
-            cache.get_all_entities_changed(1),
-            ["user@foo.com", "bar@baz.net", "user@elsewhere.org"],
-        )
-        self.assertEqual(
-            cache.get_all_entities_changed(2), ["bar@baz.net", "user@elsewhere.org"]
-        )
+        r = cache.get_all_entities_changed(1)
+
+        # either of these are valid
+        ok1 = [
+            "user@foo.com",
+            "bar@baz.net",
+            "anotheruser@foo.com",
+            "user@elsewhere.org",
+        ]
+        ok2 = [
+            "user@foo.com",
+            "anotheruser@foo.com",
+            "bar@baz.net",
+            "user@elsewhere.org",
+        ]
+        self.assertTrue(r == ok1 or r == ok2)
+
+        r = cache.get_all_entities_changed(2)
+        self.assertTrue(r == ok1[1:] or r == ok2[1:])
+
         self.assertEqual(cache.get_all_entities_changed(3), ["user@elsewhere.org"])
         self.assertEqual(cache.get_all_entities_changed(0), None)
 
+        # ... later, things gest more updates
+        cache.entity_has_changed("user@foo.com", 5)
+        cache.entity_has_changed("bar@baz.net", 5)
+        cache.entity_has_changed("anotheruser@foo.com", 6)
+
+        ok1 = [
+            "user@elsewhere.org",
+            "user@foo.com",
+            "bar@baz.net",
+            "anotheruser@foo.com",
+        ]
+        ok2 = [
+            "user@elsewhere.org",
+            "bar@baz.net",
+            "user@foo.com",
+            "anotheruser@foo.com",
+        ]
+        r = cache.get_all_entities_changed(3)
+        self.assertTrue(r == ok1 or r == ok2)
+
     def test_has_any_entity_changed(self):
         """
         StreamChangeCache.has_any_entity_changed will return True if any
diff --git a/tox.ini b/tox.ini
index 42b2d74891..31011d7436 100644
--- a/tox.ini
+++ b/tox.ini
@@ -202,7 +202,9 @@ commands = mypy \
             synapse/spam_checker_api \
             synapse/storage/engines \
             synapse/storage/database.py \
-            synapse/streams
+            synapse/streams \
+            synapse/util/caches/stream_change_cache.py \
+            tests/util/test_stream_change_cache.py
 
 # To find all folders that pass mypy you run:
 #
-- 
cgit 1.4.1


From 7cb8b4bc67042a39bd1b0e05df46089a2fce1955 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Tue, 12 May 2020 03:45:23 +1000
Subject: Allow configuration of Synapse's cache without using synctl or
 environment variables (#6391)

---
 changelog.d/6391.feature                          |   1 +
 docs/sample_config.yaml                           |  43 +++++-
 synapse/api/auth.py                               |   4 +-
 synapse/app/homeserver.py                         |   5 +-
 synapse/config/cache.py                           | 164 ++++++++++++++++++++++
 synapse/config/database.py                        |   6 -
 synapse/config/homeserver.py                      |   2 +
 synapse/http/client.py                            |   6 +-
 synapse/metrics/_exposition.py                    |  12 +-
 synapse/push/bulk_push_rule_evaluator.py          |   4 +-
 synapse/push/push_rule_evaluator.py               |   4 +-
 synapse/replication/slave/storage/client_ips.py   |   3 +-
 synapse/state/__init__.py                         |   4 +-
 synapse/storage/data_stores/main/client_ips.py    |   3 +-
 synapse/storage/data_stores/main/events_worker.py |   5 +-
 synapse/storage/data_stores/state/store.py        |   6 +-
 synapse/util/caches/__init__.py                   | 144 ++++++++++---------
 synapse/util/caches/descriptors.py                |  36 ++++-
 synapse/util/caches/expiringcache.py              |  29 +++-
 synapse/util/caches/lrucache.py                   |  52 +++++--
 synapse/util/caches/response_cache.py             |   2 +-
 synapse/util/caches/stream_change_cache.py        |  33 ++++-
 synapse/util/caches/ttlcache.py                   |   2 +-
 tests/config/test_cache.py                        | 127 +++++++++++++++++
 tests/storage/test__base.py                       |   8 +-
 tests/storage/test_appservice.py                  |  10 +-
 tests/storage/test_base.py                        |   3 +-
 tests/test_metrics.py                             |  34 +++++
 tests/util/test_expiring_cache.py                 |   2 +-
 tests/util/test_lrucache.py                       |   6 +-
 tests/util/test_stream_change_cache.py            |   5 +-
 tests/utils.py                                    |   1 +
 32 files changed, 620 insertions(+), 146 deletions(-)
 create mode 100644 changelog.d/6391.feature
 create mode 100644 synapse/config/cache.py
 create mode 100644 tests/config/test_cache.py

(limited to 'tests/util')

diff --git a/changelog.d/6391.feature b/changelog.d/6391.feature
new file mode 100644
index 0000000000..f123426e23
--- /dev/null
+++ b/changelog.d/6391.feature
@@ -0,0 +1 @@
+Synapse's cache factor can now be configured in `homeserver.yaml` by the `caches.global_factor` setting. Additionally, `caches.per_cache_factors` controls the cache factors for individual caches.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 5abeaf519b..8a8415b9a2 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -603,6 +603,45 @@ acme:
 
 
+## Caching ##
+
+# Caching can be configured through the following options.
+#
+# A cache 'factor' is a multiplier that can be applied to each of
+# Synapse's caches in order to increase or decrease the maximum
+# number of entries that can be stored.
+
+# The number of events to cache in memory. Not affected by
+# caches.global_factor.
+#
+#event_cache_size: 10K
+
+caches:
+   # Controls the global cache factor, which is the default cache factor
+   # for all caches if a specific factor for that cache is not otherwise
+   # set.
+   #
+   # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+   # variable. Setting by environment variable takes priority over
+   # setting through the config file.
+   #
+   # Defaults to 0.5, which will half the size of all caches.
+   #
+   #global_factor: 1.0
+
+   # A dictionary of cache name to cache factor for that individual
+   # cache. Overrides the global cache factor for a given cache.
+   #
+   # These can also be set through environment variables comprised
+   # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+   # letters and underscores. Setting by environment variable
+   # takes priority over setting through the config file.
+   # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+   #
+   per_cache_factors:
+     #get_users_who_share_room_with_user: 2.0
+
+
 ## Database ##
 
 # The 'database' setting defines the database that synapse uses to store all of
@@ -646,10 +685,6 @@ database:
   args:
     database: DATADIR/homeserver.db
 
-# Number of events to cache in memory.
-#
-#event_cache_size: 10K
-
 
 ## Logging ##
 
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 1ad5ff9410..e009b1a760 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -37,7 +37,7 @@ from synapse.api.errors import (
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase
 from synapse.types import StateMap, UserID
-from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
+from synapse.util.caches import register_cache
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.metrics import Measure
 
@@ -73,7 +73,7 @@ class Auth(object):
         self.store = hs.get_datastore()
         self.state = hs.get_state_handler()
 
-        self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000)
+        self.token_cache = LruCache(10000)
         register_cache("cache", "token_cache", self.token_cache)
 
         self._auth_blocking = AuthBlocking(self.hs)
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index bc8695d8dd..d7f337e586 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -69,7 +69,6 @@ from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.storage.engines import IncorrectDatabaseSetup
 from synapse.storage.prepare_database import UpgradeDatabaseException
-from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.module_loader import load_module
@@ -516,8 +515,8 @@ def phone_stats_home(hs, stats, stats_process=_stats_process):
 
     daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
     stats["daily_sent_messages"] = daily_sent_messages
-    stats["cache_factor"] = CACHE_SIZE_FACTOR
-    stats["event_cache_size"] = hs.config.event_cache_size
+    stats["cache_factor"] = hs.config.caches.global_factor
+    stats["event_cache_size"] = hs.config.caches.event_cache_size
 
     #
     # Performance statistics
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
new file mode 100644
index 0000000000..91036a012e
--- /dev/null
+++ b/synapse/config/cache.py
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Callable, Dict
+
+from ._base import Config, ConfigError
+
+# The prefix for all cache factor-related environment variables
+_CACHES = {}
+_CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR"
+_DEFAULT_FACTOR_SIZE = 0.5
+_DEFAULT_EVENT_CACHE_SIZE = "10K"
+
+
+class CacheProperties(object):
+    def __init__(self):
+        # The default factor size for all caches
+        self.default_factor_size = float(
+            os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
+        )
+        self.resize_all_caches_func = None
+
+
+properties = CacheProperties()
+
+
+def add_resizable_cache(cache_name: str, cache_resize_callback: Callable):
+    """Register a cache that's size can dynamically change
+
+    Args:
+        cache_name: A reference to the cache
+        cache_resize_callback: A callback function that will be ran whenever
+            the cache needs to be resized
+    """
+    _CACHES[cache_name.lower()] = cache_resize_callback
+
+    # Ensure all loaded caches are sized appropriately
+    #
+    # This method should only run once the config has been read,
+    # as it uses values read from it
+    if properties.resize_all_caches_func:
+        properties.resize_all_caches_func()
+
+
+class CacheConfig(Config):
+    section = "caches"
+    _environ = os.environ
+
+    @staticmethod
+    def reset():
+        """Resets the caches to their defaults. Used for tests."""
+        properties.default_factor_size = float(
+            os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
+        )
+        properties.resize_all_caches_func = None
+        _CACHES.clear()
+
+    def generate_config_section(self, **kwargs):
+        return """\
+        ## Caching ##
+
+        # Caching can be configured through the following options.
+        #
+        # A cache 'factor' is a multiplier that can be applied to each of
+        # Synapse's caches in order to increase or decrease the maximum
+        # number of entries that can be stored.
+
+        # The number of events to cache in memory. Not affected by
+        # caches.global_factor.
+        #
+        #event_cache_size: 10K
+
+        caches:
+           # Controls the global cache factor, which is the default cache factor
+           # for all caches if a specific factor for that cache is not otherwise
+           # set.
+           #
+           # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+           # variable. Setting by environment variable takes priority over
+           # setting through the config file.
+           #
+           # Defaults to 0.5, which will half the size of all caches.
+           #
+           #global_factor: 1.0
+
+           # A dictionary of cache name to cache factor for that individual
+           # cache. Overrides the global cache factor for a given cache.
+           #
+           # These can also be set through environment variables comprised
+           # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+           # letters and underscores. Setting by environment variable
+           # takes priority over setting through the config file.
+           # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+           #
+           per_cache_factors:
+             #get_users_who_share_room_with_user: 2.0
+        """
+
+    def read_config(self, config, **kwargs):
+        self.event_cache_size = self.parse_size(
+            config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
+        )
+        self.cache_factors = {}  # type: Dict[str, float]
+
+        cache_config = config.get("caches") or {}
+        self.global_factor = cache_config.get(
+            "global_factor", properties.default_factor_size
+        )
+        if not isinstance(self.global_factor, (int, float)):
+            raise ConfigError("caches.global_factor must be a number.")
+
+        # Set the global one so that it's reflected in new caches
+        properties.default_factor_size = self.global_factor
+
+        # Load cache factors from the config
+        individual_factors = cache_config.get("per_cache_factors") or {}
+        if not isinstance(individual_factors, dict):
+            raise ConfigError("caches.per_cache_factors must be a dictionary")
+
+        # Override factors from environment if necessary
+        individual_factors.update(
+            {
+                key[len(_CACHE_PREFIX) + 1 :].lower(): float(val)
+                for key, val in self._environ.items()
+                if key.startswith(_CACHE_PREFIX + "_")
+            }
+        )
+
+        for cache, factor in individual_factors.items():
+            if not isinstance(factor, (int, float)):
+                raise ConfigError(
+                    "caches.per_cache_factors.%s must be a number" % (cache.lower(),)
+                )
+            self.cache_factors[cache.lower()] = factor
+
+        # Resize all caches (if necessary) with the new factors we've loaded
+        self.resize_all_caches()
+
+        # Store this function so that it can be called from other classes without
+        # needing an instance of Config
+        properties.resize_all_caches_func = self.resize_all_caches
+
+    def resize_all_caches(self):
+        """Ensure all cache sizes are up to date
+
+        For each cache, run the mapped callback function with either
+        a specific cache factor or the default, global one.
+        """
+        for cache_name, callback in _CACHES.items():
+            new_factor = self.cache_factors.get(cache_name, self.global_factor)
+            callback(new_factor)
diff --git a/synapse/config/database.py b/synapse/config/database.py
index 5b662d1b01..1064c2697b 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -68,10 +68,6 @@ database:
   name: sqlite3
   args:
     database: %(database_path)s
-
-# Number of events to cache in memory.
-#
-#event_cache_size: 10K
 """
 
 
@@ -116,8 +112,6 @@ class DatabaseConfig(Config):
         self.databases = []
 
     def read_config(self, config, **kwargs):
-        self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K"))
-
         # We *experimentally* support specifying multiple databases via the
         # `databases` key. This is a map from a label to database config in the
         # same format as the `database` config option, plus an extra
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 996d3e6bf7..2c7b3a699f 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -17,6 +17,7 @@
 from ._base import RootConfig
 from .api import ApiConfig
 from .appservice import AppServiceConfig
+from .cache import CacheConfig
 from .captcha import CaptchaConfig
 from .cas import CasConfig
 from .consent_config import ConsentConfig
@@ -55,6 +56,7 @@ class HomeServerConfig(RootConfig):
     config_classes = [
         ServerConfig,
         TlsConfig,
+        CacheConfig,
         DatabaseConfig,
         LoggingConfig,
         RatelimitConfig,
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 58eb47c69c..3cef747a4d 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -49,7 +49,6 @@ from synapse.http.proxyagent import ProxyAgent
 from synapse.logging.context import make_deferred_yieldable
 from synapse.logging.opentracing import set_tag, start_active_span, tags
 from synapse.util.async_helpers import timeout_deferred
-from synapse.util.caches import CACHE_SIZE_FACTOR
 
 logger = logging.getLogger(__name__)
 
@@ -241,7 +240,10 @@ class SimpleHttpClient(object):
         # tends to do so in batches, so we need to allow the pool to keep
         # lots of idle connections around.
         pool = HTTPConnectionPool(self.reactor)
-        pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5))
+        # XXX: The justification for using the cache factor here is that larger instances
+        # will need both more cache and more connections.
+        # Still, this should probably be a separate dial
+        pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5))
         pool.cachedConnectionTimeout = 2 * 60
 
         self.agent = ProxyAgent(
diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py
index a248103191..ab7f948ed4 100644
--- a/synapse/metrics/_exposition.py
+++ b/synapse/metrics/_exposition.py
@@ -33,6 +33,8 @@ from prometheus_client import REGISTRY
 
 from twisted.web.resource import Resource
 
+from synapse.util import caches
+
 try:
     from prometheus_client.samples import Sample
 except ImportError:
@@ -103,13 +105,15 @@ def nameify_sample(sample):
 
 
 def generate_latest(registry, emit_help=False):
-    output = []
 
-    for metric in registry.collect():
+    # Trigger the cache metrics to be rescraped, which updates the common
+    # metrics but do not produce metrics themselves
+    for collector in caches.collectors_by_name.values():
+        collector.collect()
 
-        if metric.name.startswith("__unused"):
-            continue
+    output = []
 
+    for metric in registry.collect():
         if not metric.samples:
             # No samples, don't bother.
             continue
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 433ca2f416..e75d964ac8 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -51,6 +51,7 @@ push_rules_delta_state_cache_metric = register_cache(
     "cache",
     "push_rules_delta_state_cache_metric",
     cache=[],  # Meaningless size, as this isn't a cache that stores values
+    resizable=False,
 )
 
 
@@ -67,7 +68,8 @@ class BulkPushRuleEvaluator(object):
         self.room_push_rule_cache_metrics = register_cache(
             "cache",
             "room_push_rule_cache",
-            cache=[],  # Meaningless size, as this isn't a cache that stores values
+            cache=[],  # Meaningless size, as this isn't a cache that stores values,
+            resizable=False,
         )
 
     @defer.inlineCallbacks
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 4cd702b5fa..11032491af 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -22,7 +22,7 @@ from six import string_types
 
 from synapse.events import EventBase
 from synapse.types import UserID
-from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
+from synapse.util.caches import register_cache
 from synapse.util.caches.lrucache import LruCache
 
 logger = logging.getLogger(__name__)
@@ -165,7 +165,7 @@ class PushRuleEvaluatorForEvent(object):
 
 
 # Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches
-regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
+regex_cache = LruCache(50000)
 register_cache("cache", "regex_push_cache", regex_cache)
 
 
diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py
index fbf996e33a..1a38f53dfb 100644
--- a/synapse/replication/slave/storage/client_ips.py
+++ b/synapse/replication/slave/storage/client_ips.py
@@ -15,7 +15,6 @@
 
 from synapse.storage.data_stores.main.client_ips import LAST_SEEN_GRANULARITY
 from synapse.storage.database import Database
-from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.caches.descriptors import Cache
 
 from ._base import BaseSlavedStore
@@ -26,7 +25,7 @@ class SlavedClientIpStore(BaseSlavedStore):
         super(SlavedClientIpStore, self).__init__(database, db_conn, hs)
 
         self.client_ip_last_seen = Cache(
-            name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
+            name="client_ip_last_seen", keylen=4, max_entries=50000
         )
 
     def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 4afefc6b1d..2fa529fcd0 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -35,7 +35,6 @@ from synapse.state import v1, v2
 from synapse.storage.data_stores.main.events_worker import EventRedactBehaviour
 from synapse.types import StateMap
 from synapse.util.async_helpers import Linearizer
-from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.metrics import Measure, measure_func
 
@@ -53,7 +52,6 @@ state_groups_histogram = Histogram(
 KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key"))
 
 
-SIZE_OF_CACHE = 100000 * get_cache_factor_for("state_cache")
 EVICTION_TIMEOUT_SECONDS = 60 * 60
 
 
@@ -447,7 +445,7 @@ class StateResolutionHandler(object):
         self._state_cache = ExpiringCache(
             cache_name="state_cache",
             clock=self.clock,
-            max_len=SIZE_OF_CACHE,
+            max_len=100000,
             expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000,
             iterable=True,
             reset_expiry_on_get=True,
diff --git a/synapse/storage/data_stores/main/client_ips.py b/synapse/storage/data_stores/main/client_ips.py
index 92bc06919b..71f8d43a76 100644
--- a/synapse/storage/data_stores/main/client_ips.py
+++ b/synapse/storage/data_stores/main/client_ips.py
@@ -22,7 +22,6 @@ from twisted.internet import defer
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import Database, make_tuple_comparison_clause
-from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.caches.descriptors import Cache
 
 logger = logging.getLogger(__name__)
@@ -361,7 +360,7 @@ class ClientIpStore(ClientIpBackgroundUpdateStore):
     def __init__(self, database: Database, db_conn, hs):
 
         self.client_ip_last_seen = Cache(
-            name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
+            name="client_ip_last_seen", keylen=4, max_entries=50000
         )
 
         super(ClientIpStore, self).__init__(database, db_conn, hs)
diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py
index 73df6b33ba..b8c1bbdf99 100644
--- a/synapse/storage/data_stores/main/events_worker.py
+++ b/synapse/storage/data_stores/main/events_worker.py
@@ -75,7 +75,10 @@ class EventsWorkerStore(SQLBaseStore):
         super(EventsWorkerStore, self).__init__(database, db_conn, hs)
 
         self._get_event_cache = Cache(
-            "*getEvent*", keylen=3, max_entries=hs.config.event_cache_size
+            "*getEvent*",
+            keylen=3,
+            max_entries=hs.config.caches.event_cache_size,
+            apply_cache_factor_from_config=False,
         )
 
         self._event_fetch_lock = threading.Condition()
diff --git a/synapse/storage/data_stores/state/store.py b/synapse/storage/data_stores/state/store.py
index 57a5267663..f3ad1e4369 100644
--- a/synapse/storage/data_stores/state/store.py
+++ b/synapse/storage/data_stores/state/store.py
@@ -28,7 +28,6 @@ from synapse.storage.data_stores.state.bg_updates import StateBackgroundUpdateSt
 from synapse.storage.database import Database
 from synapse.storage.state import StateFilter
 from synapse.types import StateMap
-from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.dictionary_cache import DictionaryCache
 
@@ -90,11 +89,10 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
         self._state_group_cache = DictionaryCache(
             "*stateGroupCache*",
             # TODO: this hasn't been tuned yet
-            50000 * get_cache_factor_for("stateGroupCache"),
+            50000,
         )
         self._state_group_members_cache = DictionaryCache(
-            "*stateGroupMembersCache*",
-            500000 * get_cache_factor_for("stateGroupMembersCache"),
+            "*stateGroupMembersCache*", 500000,
         )
 
     @cached(max_entries=10000, iterable=True)
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index da5077b471..4b8a0c7a8f 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2019 The Matrix.org Foundation C.I.C.
+# Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,27 +15,17 @@
 # limitations under the License.
 
 import logging
-import os
-from typing import Dict
+from typing import Callable, Dict, Optional
 
 import six
 from six.moves import intern
 
-from prometheus_client.core import REGISTRY, Gauge, GaugeMetricFamily
-
-logger = logging.getLogger(__name__)
-
-CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
+import attr
+from prometheus_client.core import Gauge
 
+from synapse.config.cache import add_resizable_cache
 
-def get_cache_factor_for(cache_name):
-    env_var = "SYNAPSE_CACHE_FACTOR_" + cache_name.upper()
-    factor = os.environ.get(env_var)
-    if factor:
-        return float(factor)
-
-    return CACHE_SIZE_FACTOR
-
+logger = logging.getLogger(__name__)
 
 caches_by_name = {}
 collectors_by_name = {}  # type: Dict
@@ -44,6 +34,7 @@ cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
 cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
 cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
 cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
+cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
 
 response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
 response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
@@ -53,67 +44,82 @@ response_cache_evicted = Gauge(
 response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
 
 
-def register_cache(cache_type, cache_name, cache, collect_callback=None):
-    """Register a cache object for metric collection.
+@attr.s
+class CacheMetric(object):
+
+    _cache = attr.ib()
+    _cache_type = attr.ib(type=str)
+    _cache_name = attr.ib(type=str)
+    _collect_callback = attr.ib(type=Optional[Callable])
+
+    hits = attr.ib(default=0)
+    misses = attr.ib(default=0)
+    evicted_size = attr.ib(default=0)
+
+    def inc_hits(self):
+        self.hits += 1
+
+    def inc_misses(self):
+        self.misses += 1
+
+    def inc_evictions(self, size=1):
+        self.evicted_size += size
+
+    def describe(self):
+        return []
+
+    def collect(self):
+        try:
+            if self._cache_type == "response_cache":
+                response_cache_size.labels(self._cache_name).set(len(self._cache))
+                response_cache_hits.labels(self._cache_name).set(self.hits)
+                response_cache_evicted.labels(self._cache_name).set(self.evicted_size)
+                response_cache_total.labels(self._cache_name).set(
+                    self.hits + self.misses
+                )
+            else:
+                cache_size.labels(self._cache_name).set(len(self._cache))
+                cache_hits.labels(self._cache_name).set(self.hits)
+                cache_evicted.labels(self._cache_name).set(self.evicted_size)
+                cache_total.labels(self._cache_name).set(self.hits + self.misses)
+                if getattr(self._cache, "max_size", None):
+                    cache_max_size.labels(self._cache_name).set(self._cache.max_size)
+            if self._collect_callback:
+                self._collect_callback()
+        except Exception as e:
+            logger.warning("Error calculating metrics for %s: %s", self._cache_name, e)
+            raise
+
+
+def register_cache(
+    cache_type: str,
+    cache_name: str,
+    cache,
+    collect_callback: Optional[Callable] = None,
+    resizable: bool = True,
+    resize_callback: Optional[Callable] = None,
+) -> CacheMetric:
+    """Register a cache object for metric collection and resizing.
 
     Args:
-        cache_type (str):
-        cache_name (str): name of the cache
-        cache (object): cache itself
-        collect_callback (callable|None): if not None, a function which is called during
-            metric collection to update additional metrics.
+        cache_type
+        cache_name: name of the cache
+        cache: cache itself
+        collect_callback: If given, a function which is called during metric
+            collection to update additional metrics.
+        resizable: Whether this cache supports being resized.
+        resize_callback: A function which can be called to resize the cache.
 
     Returns:
         CacheMetric: an object which provides inc_{hits,misses,evictions} methods
     """
+    if resizable:
+        if not resize_callback:
+            resize_callback = getattr(cache, "set_cache_factor")
+        add_resizable_cache(cache_name, resize_callback)
 
-    # Check if the metric is already registered. Unregister it, if so.
-    # This usually happens during tests, as at runtime these caches are
-    # effectively singletons.
+    metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
     metric_name = "cache_%s_%s" % (cache_type, cache_name)
-    if metric_name in collectors_by_name.keys():
-        REGISTRY.unregister(collectors_by_name[metric_name])
-
-    class CacheMetric(object):
-
-        hits = 0
-        misses = 0
-        evicted_size = 0
-
-        def inc_hits(self):
-            self.hits += 1
-
-        def inc_misses(self):
-            self.misses += 1
-
-        def inc_evictions(self, size=1):
-            self.evicted_size += size
-
-        def describe(self):
-            return []
-
-        def collect(self):
-            try:
-                if cache_type == "response_cache":
-                    response_cache_size.labels(cache_name).set(len(cache))
-                    response_cache_hits.labels(cache_name).set(self.hits)
-                    response_cache_evicted.labels(cache_name).set(self.evicted_size)
-                    response_cache_total.labels(cache_name).set(self.hits + self.misses)
-                else:
-                    cache_size.labels(cache_name).set(len(cache))
-                    cache_hits.labels(cache_name).set(self.hits)
-                    cache_evicted.labels(cache_name).set(self.evicted_size)
-                    cache_total.labels(cache_name).set(self.hits + self.misses)
-                if collect_callback:
-                    collect_callback()
-            except Exception as e:
-                logger.warning("Error calculating metrics for %s: %s", cache_name, e)
-                raise
-
-            yield GaugeMetricFamily("__unused", "")
-
-    metric = CacheMetric()
-    REGISTRY.register(metric)
     caches_by_name[cache_name] = cache
     collectors_by_name[metric_name] = metric
     return metric
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 2e8f6543e5..cd48262420 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import functools
 import inspect
 import logging
@@ -30,7 +31,6 @@ from twisted.internet import defer
 from synapse.logging.context import make_deferred_yieldable, preserve_fn
 from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import ObservableDeferred
-from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
 
@@ -81,7 +81,6 @@ class CacheEntry(object):
 class Cache(object):
     __slots__ = (
         "cache",
-        "max_entries",
         "name",
         "keylen",
         "thread",
@@ -89,7 +88,29 @@ class Cache(object):
         "_pending_deferred_cache",
     )
 
-    def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False):
+    def __init__(
+        self,
+        name: str,
+        max_entries: int = 1000,
+        keylen: int = 1,
+        tree: bool = False,
+        iterable: bool = False,
+        apply_cache_factor_from_config: bool = True,
+    ):
+        """
+        Args:
+            name: The name of the cache
+            max_entries: Maximum amount of entries that the cache will hold
+            keylen: The length of the tuple used as the cache key
+            tree: Use a TreeCache instead of a dict as the underlying cache type
+            iterable: If True, count each item in the cached object as an entry,
+                rather than each cached object
+            apply_cache_factor_from_config: Whether cache factors specified in the
+                config file affect `max_entries`
+
+        Returns:
+            Cache
+        """
         cache_type = TreeCache if tree else dict
         self._pending_deferred_cache = cache_type()
 
@@ -99,6 +120,7 @@ class Cache(object):
             cache_type=cache_type,
             size_callback=(lambda d: len(d)) if iterable else None,
             evicted_callback=self._on_evicted,
+            apply_cache_factor_from_config=apply_cache_factor_from_config,
         )
 
         self.name = name
@@ -111,6 +133,10 @@ class Cache(object):
             collect_callback=self._metrics_collection_callback,
         )
 
+    @property
+    def max_entries(self):
+        return self.cache.max_size
+
     def _on_evicted(self, evicted_count):
         self.metrics.inc_evictions(evicted_count)
 
@@ -370,13 +396,11 @@ class CacheDescriptor(_CacheDescriptorBase):
             cache_context=cache_context,
         )
 
-        max_entries = int(max_entries * get_cache_factor_for(orig.__name__))
-
         self.max_entries = max_entries
         self.tree = tree
         self.iterable = iterable
 
-    def __get__(self, obj, objtype=None):
+    def __get__(self, obj, owner):
         cache = Cache(
             name=self.orig.__name__,
             max_entries=self.max_entries,
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index cddf1ed515..2726b67b6d 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -18,6 +18,7 @@ from collections import OrderedDict
 
 from six import iteritems, itervalues
 
+from synapse.config import cache as cache_config
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.util.caches import register_cache
 
@@ -51,15 +52,16 @@ class ExpiringCache(object):
                 an item on access. Defaults to False.
             iterable (bool): If true, the size is calculated by summing the
                 sizes of all entries, rather than the number of entries.
-
         """
         self._cache_name = cache_name
 
+        self._original_max_size = max_len
+
+        self._max_size = int(max_len * cache_config.properties.default_factor_size)
+
         self._clock = clock
 
-        self._max_len = max_len
         self._expiry_ms = expiry_ms
-
         self._reset_expiry_on_get = reset_expiry_on_get
 
         self._cache = OrderedDict()
@@ -82,9 +84,11 @@ class ExpiringCache(object):
     def __setitem__(self, key, value):
         now = self._clock.time_msec()
         self._cache[key] = _CacheEntry(now, value)
+        self.evict()
 
+    def evict(self):
         # Evict if there are now too many items
-        while self._max_len and len(self) > self._max_len:
+        while self._max_size and len(self) > self._max_size:
             _key, value = self._cache.popitem(last=False)
             if self.iterable:
                 self.metrics.inc_evictions(len(value.value))
@@ -170,6 +174,23 @@ class ExpiringCache(object):
         else:
             return len(self._cache)
 
+    def set_cache_factor(self, factor: float) -> bool:
+        """
+        Set the cache factor for this individual cache.
+
+        This will trigger a resize if it changes, which may require evicting
+        items from the cache.
+
+        Returns:
+            bool: Whether the cache changed size or not.
+        """
+        new_size = int(self._original_max_size * factor)
+        if new_size != self._max_size:
+            self._max_size = new_size
+            self.evict()
+            return True
+        return False
+
 
 class _CacheEntry(object):
     __slots__ = ["time", "value"]
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 1536cb64f3..29fabac3cd 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -13,10 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import threading
 from functools import wraps
+from typing import Callable, Optional, Type, Union
 
+from synapse.config import cache as cache_config
 from synapse.util.caches.treecache import TreeCache
 
 
@@ -52,17 +53,18 @@ class LruCache(object):
 
     def __init__(
         self,
-        max_size,
-        keylen=1,
-        cache_type=dict,
-        size_callback=None,
-        evicted_callback=None,
+        max_size: int,
+        keylen: int = 1,
+        cache_type: Type[Union[dict, TreeCache]] = dict,
+        size_callback: Optional[Callable] = None,
+        evicted_callback: Optional[Callable] = None,
+        apply_cache_factor_from_config: bool = True,
     ):
         """
         Args:
-            max_size (int):
+            max_size: The maximum amount of entries the cache can hold
 
-            keylen (int):
+            keylen: The length of the tuple used as the cache key
 
             cache_type (type):
                 type of underlying cache to be used. Typically one of dict
@@ -73,9 +75,23 @@ class LruCache(object):
             evicted_callback (func(int)|None):
                 if not None, called on eviction with the size of the evicted
                 entry
+
+            apply_cache_factor_from_config (bool): If true, `max_size` will be
+                multiplied by a cache factor derived from the homeserver config
         """
         cache = cache_type()
         self.cache = cache  # Used for introspection.
+
+        # Save the original max size, and apply the default size factor.
+        self._original_max_size = max_size
+        # We previously didn't apply the cache factor here, and as such some caches were
+        # not affected by the global cache factor. Add an option here to disable applying
+        # the cache factor when a cache is created
+        if apply_cache_factor_from_config:
+            self.max_size = int(max_size * cache_config.properties.default_factor_size)
+        else:
+            self.max_size = int(max_size)
+
         list_root = _Node(None, None, None, None)
         list_root.next_node = list_root
         list_root.prev_node = list_root
@@ -83,7 +99,7 @@ class LruCache(object):
         lock = threading.Lock()
 
         def evict():
-            while cache_len() > max_size:
+            while cache_len() > self.max_size:
                 todelete = list_root.prev_node
                 evicted_len = delete_node(todelete)
                 cache.pop(todelete.key, None)
@@ -236,6 +252,7 @@ class LruCache(object):
             return key in cache
 
         self.sentinel = object()
+        self._on_resize = evict
         self.get = cache_get
         self.set = cache_set
         self.setdefault = cache_set_default
@@ -266,3 +283,20 @@ class LruCache(object):
 
     def __contains__(self, key):
         return self.contains(key)
+
+    def set_cache_factor(self, factor: float) -> bool:
+        """
+        Set the cache factor for this individual cache.
+
+        This will trigger a resize if it changes, which may require evicting
+        items from the cache.
+
+        Returns:
+            bool: Whether the cache changed size or not.
+        """
+        new_size = int(self._original_max_size * factor)
+        if new_size != self.max_size:
+            self.max_size = new_size
+            self._on_resize()
+            return True
+        return False
diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py
index b68f9fe0d4..a6c60888e5 100644
--- a/synapse/util/caches/response_cache.py
+++ b/synapse/util/caches/response_cache.py
@@ -38,7 +38,7 @@ class ResponseCache(object):
         self.timeout_sec = timeout_ms / 1000.0
 
         self._name = name
-        self._metrics = register_cache("response_cache", name, self)
+        self._metrics = register_cache("response_cache", name, self, resizable=False)
 
     def size(self):
         return len(self.pending_result_cache)
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index e54f80d76e..2a161bf244 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+import math
 from typing import Dict, FrozenSet, List, Mapping, Optional, Set, Union
 
 from six import integer_types
@@ -46,7 +47,8 @@ class StreamChangeCache:
         max_size=10000,
         prefilled_cache: Optional[Mapping[EntityType, int]] = None,
     ):
-        self._max_size = int(max_size * caches.CACHE_SIZE_FACTOR)
+        self._original_max_size = max_size
+        self._max_size = math.floor(max_size)
         self._entity_to_key = {}  # type: Dict[EntityType, int]
 
         # map from stream id to the a set of entities which changed at that stream id.
@@ -58,12 +60,31 @@ class StreamChangeCache:
         #
         self._earliest_known_stream_pos = current_stream_pos
         self.name = name
-        self.metrics = caches.register_cache("cache", self.name, self._cache)
+        self.metrics = caches.register_cache(
+            "cache", self.name, self._cache, resize_callback=self.set_cache_factor
+        )
 
         if prefilled_cache:
             for entity, stream_pos in prefilled_cache.items():
                 self.entity_has_changed(entity, stream_pos)
 
+    def set_cache_factor(self, factor: float) -> bool:
+        """
+        Set the cache factor for this individual cache.
+
+        This will trigger a resize if it changes, which may require evicting
+        items from the cache.
+
+        Returns:
+            bool: Whether the cache changed size or not.
+        """
+        new_size = math.floor(self._original_max_size * factor)
+        if new_size != self._max_size:
+            self.max_size = new_size
+            self._evict()
+            return True
+        return False
+
     def has_entity_changed(self, entity: EntityType, stream_pos: int) -> bool:
         """Returns True if the entity may have been updated since stream_pos
         """
@@ -171,6 +192,7 @@ class StreamChangeCache:
             e1 = self._cache[stream_pos] = set()
         e1.add(entity)
         self._entity_to_key[entity] = stream_pos
+        self._evict()
 
         # if the cache is too big, remove entries
         while len(self._cache) > self._max_size:
@@ -179,6 +201,13 @@ class StreamChangeCache:
             for entity in r:
                 del self._entity_to_key[entity]
 
+    def _evict(self):
+        while len(self._cache) > self._max_size:
+            k, r = self._cache.popitem(0)
+            self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos)
+            for entity in r:
+                self._entity_to_key.pop(entity, None)
+
     def get_max_pos_of_last_change(self, entity: EntityType) -> int:
 
         """Returns an upper bound of the stream id of the last change to an
diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py
index 99646c7cf0..6437aa907e 100644
--- a/synapse/util/caches/ttlcache.py
+++ b/synapse/util/caches/ttlcache.py
@@ -38,7 +38,7 @@ class TTLCache(object):
 
         self._timer = timer
 
-        self._metrics = register_cache("ttl", cache_name, self)
+        self._metrics = register_cache("ttl", cache_name, self, resizable=False)
 
     def set(self, key, value, ttl):
         """Add/update an entry in the cache
diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py
new file mode 100644
index 0000000000..2920279125
--- /dev/null
+++ b/tests/config/test_cache.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.config._base import Config, RootConfig
+from synapse.config.cache import CacheConfig, add_resizable_cache
+from synapse.util.caches.lrucache import LruCache
+
+from tests.unittest import TestCase
+
+
+class FakeServer(Config):
+    section = "server"
+
+
+class TestConfig(RootConfig):
+    config_classes = [FakeServer, CacheConfig]
+
+
+class CacheConfigTests(TestCase):
+    def setUp(self):
+        # Reset caches before each test
+        TestConfig().caches.reset()
+
+    def test_individual_caches_from_environ(self):
+        """
+        Individual cache factors will be loaded from the environment.
+        """
+        config = {}
+        t = TestConfig()
+        t.caches._environ = {
+            "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
+            "SYNAPSE_NOT_CACHE": "BLAH",
+        }
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(dict(t.caches.cache_factors), {"something_or_other": 2.0})
+
+    def test_config_overrides_environ(self):
+        """
+        Individual cache factors defined in the environment will take precedence
+        over those in the config.
+        """
+        config = {"caches": {"per_cache_factors": {"foo": 2, "bar": 3}}}
+        t = TestConfig()
+        t.caches._environ = {
+            "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
+            "SYNAPSE_CACHE_FACTOR_FOO": 1,
+        }
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(
+            dict(t.caches.cache_factors),
+            {"foo": 1.0, "bar": 3.0, "something_or_other": 2.0},
+        )
+
+    def test_individual_instantiated_before_config_load(self):
+        """
+        If a cache is instantiated before the config is read, it will be given
+        the default cache size in the interim, and then resized once the config
+        is loaded.
+        """
+        cache = LruCache(100)
+
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 50)
+
+        config = {"caches": {"per_cache_factors": {"foo": 3}}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(cache.max_size, 300)
+
+    def test_individual_instantiated_after_config_load(self):
+        """
+        If a cache is instantiated after the config is read, it will be
+        immediately resized to the correct size given the per_cache_factor if
+        there is one.
+        """
+        config = {"caches": {"per_cache_factors": {"foo": 2}}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        cache = LruCache(100)
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 200)
+
+    def test_global_instantiated_before_config_load(self):
+        """
+        If a cache is instantiated before the config is read, it will be given
+        the default cache size in the interim, and then resized to the new
+        default cache size once the config is loaded.
+        """
+        cache = LruCache(100)
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 50)
+
+        config = {"caches": {"global_factor": 4}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(cache.max_size, 400)
+
+    def test_global_instantiated_after_config_load(self):
+        """
+        If a cache is instantiated after the config is read, it will be
+        immediately resized to the correct size given the global factor if there
+        is no per-cache factor.
+        """
+        config = {"caches": {"global_factor": 1.5}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        cache = LruCache(100)
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 150)
diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py
index e37260a820..5a50e4fdd4 100644
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@@ -25,8 +25,8 @@ from synapse.util.caches.descriptors import Cache, cached
 from tests import unittest
 
 
-class CacheTestCase(unittest.TestCase):
-    def setUp(self):
+class CacheTestCase(unittest.HomeserverTestCase):
+    def prepare(self, reactor, clock, homeserver):
         self.cache = Cache("test")
 
     def test_empty(self):
@@ -96,7 +96,7 @@ class CacheTestCase(unittest.TestCase):
         cache.get(3)
 
 
-class CacheDecoratorTestCase(unittest.TestCase):
+class CacheDecoratorTestCase(unittest.HomeserverTestCase):
     @defer.inlineCallbacks
     def test_passthrough(self):
         class A(object):
@@ -239,7 +239,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
         callcount2 = [0]
 
         class A(object):
-            @cached(max_entries=4)  # HACK: This makes it 2 due to cache factor
+            @cached(max_entries=2)
             def func(self, key):
                 callcount[0] += 1
                 return key
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index 31710949a8..ef296e7dab 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -43,7 +43,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = self.as_yaml_files
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         self.as_token = "token1"
@@ -110,7 +110,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = self.as_yaml_files
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         self.as_list = [
@@ -422,7 +422,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = [f1, f2]
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         database = hs.get_datastores().databases[0]
@@ -440,7 +440,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = [f1, f2]
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         with self.assertRaises(ConfigError) as cm:
@@ -464,7 +464,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = [f1, f2]
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         with self.assertRaises(ConfigError) as cm:
diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py
index cdee0a9e60..278961c331 100644
--- a/tests/storage/test_base.py
+++ b/tests/storage/test_base.py
@@ -51,7 +51,8 @@ class SQLBaseStoreTestCase(unittest.TestCase):
 
         config = Mock()
         config._disable_native_upserts = True
-        config.event_cache_size = 1
+        config.caches = Mock()
+        config.caches.event_cache_size = 1
         hs = TestHomeServer("test", config=config)
 
         sqlite_config = {"name": "sqlite3"}
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 270f853d60..f5f63d8ed6 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
+from synapse.util.caches.descriptors import Cache
 
 from tests import unittest
 
@@ -129,3 +130,36 @@ class BuildInfoTests(unittest.TestCase):
         self.assertTrue(b"osversion=" in items[0])
         self.assertTrue(b"pythonversion=" in items[0])
         self.assertTrue(b"version=" in items[0])
+
+
+class CacheMetricsTests(unittest.HomeserverTestCase):
+    def test_cache_metric(self):
+        """
+        Caches produce metrics reflecting their state when scraped.
+        """
+        CACHE_NAME = "cache_metrics_test_fgjkbdfg"
+        cache = Cache(CACHE_NAME, max_entries=777)
+
+        items = {
+            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
+            for x in filter(
+                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        }
+
+        self.assertEqual(items["synapse_util_caches_cache_size"], "0.0")
+        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
+
+        cache.prefill("1", "hi")
+
+        items = {
+            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
+            for x in filter(
+                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        }
+
+        self.assertEqual(items["synapse_util_caches_cache_size"], "1.0")
+        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
diff --git a/tests/util/test_expiring_cache.py b/tests/util/test_expiring_cache.py
index 50bc7702d2..49ffeebd0e 100644
--- a/tests/util/test_expiring_cache.py
+++ b/tests/util/test_expiring_cache.py
@@ -21,7 +21,7 @@ from tests.utils import MockClock
 from .. import unittest
 
 
-class ExpiringCacheTestCase(unittest.TestCase):
+class ExpiringCacheTestCase(unittest.HomeserverTestCase):
     def test_get_set(self):
         clock = MockClock()
         cache = ExpiringCache("test", clock, max_len=1)
diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py
index 786947375d..0adb2174af 100644
--- a/tests/util/test_lrucache.py
+++ b/tests/util/test_lrucache.py
@@ -22,7 +22,7 @@ from synapse.util.caches.treecache import TreeCache
 from .. import unittest
 
 
-class LruCacheTestCase(unittest.TestCase):
+class LruCacheTestCase(unittest.HomeserverTestCase):
     def test_get_set(self):
         cache = LruCache(1)
         cache["key"] = "value"
@@ -84,7 +84,7 @@ class LruCacheTestCase(unittest.TestCase):
         self.assertEquals(len(cache), 0)
 
 
-class LruCacheCallbacksTestCase(unittest.TestCase):
+class LruCacheCallbacksTestCase(unittest.HomeserverTestCase):
     def test_get(self):
         m = Mock()
         cache = LruCache(1)
@@ -233,7 +233,7 @@ class LruCacheCallbacksTestCase(unittest.TestCase):
         self.assertEquals(m3.call_count, 1)
 
 
-class LruCacheSizedTestCase(unittest.TestCase):
+class LruCacheSizedTestCase(unittest.HomeserverTestCase):
     def test_evict(self):
         cache = LruCache(5, size_callback=len)
         cache["key1"] = [0]
diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py
index 6857933540..13b753e367 100644
--- a/tests/util/test_stream_change_cache.py
+++ b/tests/util/test_stream_change_cache.py
@@ -1,11 +1,9 @@
-from mock import patch
-
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
 from tests import unittest
 
 
-class StreamChangeCacheTests(unittest.TestCase):
+class StreamChangeCacheTests(unittest.HomeserverTestCase):
     """
     Tests for StreamChangeCache.
     """
@@ -54,7 +52,6 @@ class StreamChangeCacheTests(unittest.TestCase):
         self.assertTrue(cache.has_entity_changed("user@foo.com", 0))
         self.assertTrue(cache.has_entity_changed("not@here.website", 0))
 
-    @patch("synapse.util.caches.CACHE_SIZE_FACTOR", 1.0)
     def test_entity_has_changed_pops_off_start(self):
         """
         StreamChangeCache.entity_has_changed will respect the max size and
diff --git a/tests/utils.py b/tests/utils.py
index f9be62b499..59c020a051 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -167,6 +167,7 @@ def default_config(name, parse=False):
         # disable user directory updates, because they get done in the
         # background, which upsets the test runner.
         "update_user_directory": False,
+        "caches": {"global_factor": 1},
     }
 
     if parse:
-- 
cgit 1.4.1


From a72d5f39db55dfecb48291acdd6566c6556e0b0b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 27 May 2020 19:41:06 +0100
Subject: Add test for Linearizer.is_queued(..)

---
 tests/util/test_linearizer.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'tests/util')

diff --git a/tests/util/test_linearizer.py b/tests/util/test_linearizer.py
index 852ef23185..ca3858b184 100644
--- a/tests/util/test_linearizer.py
+++ b/tests/util/test_linearizer.py
@@ -45,6 +45,38 @@ class LinearizerTestCase(unittest.TestCase):
         with (yield d2):
             pass
 
+    @defer.inlineCallbacks
+    def test_linearizer_is_queued(self):
+        linearizer = Linearizer()
+
+        key = object()
+
+        d1 = linearizer.queue(key)
+        cm1 = yield d1
+
+        # Since d1 gets called immediately, "is_queued" should return false.
+        self.assertFalse(linearizer.is_queued(key))
+
+        d2 = linearizer.queue(key)
+        self.assertFalse(d2.called)
+
+        # Now d2 is queued up behind successful completion of cm1
+        self.assertTrue(linearizer.is_queued(key))
+
+        with cm1:
+            self.assertFalse(d2.called)
+
+            # cm1 still not done, so d2 still queued.
+            self.assertTrue(linearizer.is_queued(key))
+
+        # And now d2 is called and nothing is in the queue again
+        self.assertFalse(linearizer.is_queued(key))
+
+        with (yield d2):
+            self.assertFalse(linearizer.is_queued(key))
+
+        self.assertFalse(linearizer.is_queued(key))
+
     def test_lots_of_queued_things(self):
         # we have one slow thing, and lots of fast things queued up behind it.
         # it should *not* explode the stack.
-- 
cgit 1.4.1