From 616af44137c78d481024da83bb51ed0d50a49522 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quentingliech@gmail.com>
Date: Fri, 8 May 2020 14:30:40 +0200
Subject: Implement OpenID Connect-based login (#7256)

---
 synapse/app/homeserver.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'synapse/app')

diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index cbd1ea475a..bc8695d8dd 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -192,6 +192,11 @@ class SynapseHomeServer(HomeServer):
                 }
             )
 
+            if self.get_config().oidc_enabled:
+                from synapse.rest.oidc import OIDCResource
+
+                resources["/_synapse/oidc"] = OIDCResource(self)
+
             if self.get_config().saml2_enabled:
                 from synapse.rest.saml2 import SAML2Resource
 
@@ -422,6 +427,13 @@ def setup(config_options):
                 # Check if it needs to be reprovisioned every day.
                 hs.get_clock().looping_call(reprovision_acme, 24 * 60 * 60 * 1000)
 
+            # Load the OIDC provider metadatas, if OIDC is enabled.
+            if hs.config.oidc_enabled:
+                oidc = hs.get_oidc_handler()
+                # Loading the provider metadata also ensures the provider config is valid.
+                yield defer.ensureDeferred(oidc.load_metadata())
+                yield defer.ensureDeferred(oidc.load_jwks())
+
             _base.start(hs, config.listeners)
 
             hs.get_datastore().db.updates.start_doing_background_updates()
-- 
cgit 1.5.1


From 7cb8b4bc67042a39bd1b0e05df46089a2fce1955 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Tue, 12 May 2020 03:45:23 +1000
Subject: Allow configuration of Synapse's cache without using synctl or
 environment variables (#6391)

---
 changelog.d/6391.feature                          |   1 +
 docs/sample_config.yaml                           |  43 +++++-
 synapse/api/auth.py                               |   4 +-
 synapse/app/homeserver.py                         |   5 +-
 synapse/config/cache.py                           | 164 ++++++++++++++++++++++
 synapse/config/database.py                        |   6 -
 synapse/config/homeserver.py                      |   2 +
 synapse/http/client.py                            |   6 +-
 synapse/metrics/_exposition.py                    |  12 +-
 synapse/push/bulk_push_rule_evaluator.py          |   4 +-
 synapse/push/push_rule_evaluator.py               |   4 +-
 synapse/replication/slave/storage/client_ips.py   |   3 +-
 synapse/state/__init__.py                         |   4 +-
 synapse/storage/data_stores/main/client_ips.py    |   3 +-
 synapse/storage/data_stores/main/events_worker.py |   5 +-
 synapse/storage/data_stores/state/store.py        |   6 +-
 synapse/util/caches/__init__.py                   | 144 ++++++++++---------
 synapse/util/caches/descriptors.py                |  36 ++++-
 synapse/util/caches/expiringcache.py              |  29 +++-
 synapse/util/caches/lrucache.py                   |  52 +++++--
 synapse/util/caches/response_cache.py             |   2 +-
 synapse/util/caches/stream_change_cache.py        |  33 ++++-
 synapse/util/caches/ttlcache.py                   |   2 +-
 tests/config/test_cache.py                        | 127 +++++++++++++++++
 tests/storage/test__base.py                       |   8 +-
 tests/storage/test_appservice.py                  |  10 +-
 tests/storage/test_base.py                        |   3 +-
 tests/test_metrics.py                             |  34 +++++
 tests/util/test_expiring_cache.py                 |   2 +-
 tests/util/test_lrucache.py                       |   6 +-
 tests/util/test_stream_change_cache.py            |   5 +-
 tests/utils.py                                    |   1 +
 32 files changed, 620 insertions(+), 146 deletions(-)
 create mode 100644 changelog.d/6391.feature
 create mode 100644 synapse/config/cache.py
 create mode 100644 tests/config/test_cache.py

(limited to 'synapse/app')

diff --git a/changelog.d/6391.feature b/changelog.d/6391.feature
new file mode 100644
index 0000000000..f123426e23
--- /dev/null
+++ b/changelog.d/6391.feature
@@ -0,0 +1 @@
+Synapse's cache factor can now be configured in `homeserver.yaml` by the `caches.global_factor` setting. Additionally, `caches.per_cache_factors` controls the cache factors for individual caches.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 5abeaf519b..8a8415b9a2 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -603,6 +603,45 @@ acme:
 
 
+## Caching ##
+
+# Caching can be configured through the following options.
+#
+# A cache 'factor' is a multiplier that can be applied to each of
+# Synapse's caches in order to increase or decrease the maximum
+# number of entries that can be stored.
+
+# The number of events to cache in memory. Not affected by
+# caches.global_factor.
+#
+#event_cache_size: 10K
+
+caches:
+   # Controls the global cache factor, which is the default cache factor
+   # for all caches if a specific factor for that cache is not otherwise
+   # set.
+   #
+   # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+   # variable. Setting by environment variable takes priority over
+   # setting through the config file.
+   #
+   # Defaults to 0.5, which will half the size of all caches.
+   #
+   #global_factor: 1.0
+
+   # A dictionary of cache name to cache factor for that individual
+   # cache. Overrides the global cache factor for a given cache.
+   #
+   # These can also be set through environment variables comprised
+   # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+   # letters and underscores. Setting by environment variable
+   # takes priority over setting through the config file.
+   # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+   #
+   per_cache_factors:
+     #get_users_who_share_room_with_user: 2.0
+
+
 ## Database ##
 
 # The 'database' setting defines the database that synapse uses to store all of
@@ -646,10 +685,6 @@ database:
   args:
     database: DATADIR/homeserver.db
 
-# Number of events to cache in memory.
-#
-#event_cache_size: 10K
-
 
 ## Logging ##
 
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 1ad5ff9410..e009b1a760 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -37,7 +37,7 @@ from synapse.api.errors import (
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase
 from synapse.types import StateMap, UserID
-from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
+from synapse.util.caches import register_cache
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.metrics import Measure
 
@@ -73,7 +73,7 @@ class Auth(object):
         self.store = hs.get_datastore()
         self.state = hs.get_state_handler()
 
-        self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000)
+        self.token_cache = LruCache(10000)
         register_cache("cache", "token_cache", self.token_cache)
 
         self._auth_blocking = AuthBlocking(self.hs)
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index bc8695d8dd..d7f337e586 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -69,7 +69,6 @@ from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.storage.engines import IncorrectDatabaseSetup
 from synapse.storage.prepare_database import UpgradeDatabaseException
-from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.module_loader import load_module
@@ -516,8 +515,8 @@ def phone_stats_home(hs, stats, stats_process=_stats_process):
 
     daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
     stats["daily_sent_messages"] = daily_sent_messages
-    stats["cache_factor"] = CACHE_SIZE_FACTOR
-    stats["event_cache_size"] = hs.config.event_cache_size
+    stats["cache_factor"] = hs.config.caches.global_factor
+    stats["event_cache_size"] = hs.config.caches.event_cache_size
 
     #
     # Performance statistics
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
new file mode 100644
index 0000000000..91036a012e
--- /dev/null
+++ b/synapse/config/cache.py
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Callable, Dict
+
+from ._base import Config, ConfigError
+
+# The prefix for all cache factor-related environment variables
+_CACHES = {}
+_CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR"
+_DEFAULT_FACTOR_SIZE = 0.5
+_DEFAULT_EVENT_CACHE_SIZE = "10K"
+
+
+class CacheProperties(object):
+    def __init__(self):
+        # The default factor size for all caches
+        self.default_factor_size = float(
+            os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
+        )
+        self.resize_all_caches_func = None
+
+
+properties = CacheProperties()
+
+
+def add_resizable_cache(cache_name: str, cache_resize_callback: Callable):
+    """Register a cache that's size can dynamically change
+
+    Args:
+        cache_name: A reference to the cache
+        cache_resize_callback: A callback function that will be ran whenever
+            the cache needs to be resized
+    """
+    _CACHES[cache_name.lower()] = cache_resize_callback
+
+    # Ensure all loaded caches are sized appropriately
+    #
+    # This method should only run once the config has been read,
+    # as it uses values read from it
+    if properties.resize_all_caches_func:
+        properties.resize_all_caches_func()
+
+
+class CacheConfig(Config):
+    section = "caches"
+    _environ = os.environ
+
+    @staticmethod
+    def reset():
+        """Resets the caches to their defaults. Used for tests."""
+        properties.default_factor_size = float(
+            os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
+        )
+        properties.resize_all_caches_func = None
+        _CACHES.clear()
+
+    def generate_config_section(self, **kwargs):
+        return """\
+        ## Caching ##
+
+        # Caching can be configured through the following options.
+        #
+        # A cache 'factor' is a multiplier that can be applied to each of
+        # Synapse's caches in order to increase or decrease the maximum
+        # number of entries that can be stored.
+
+        # The number of events to cache in memory. Not affected by
+        # caches.global_factor.
+        #
+        #event_cache_size: 10K
+
+        caches:
+           # Controls the global cache factor, which is the default cache factor
+           # for all caches if a specific factor for that cache is not otherwise
+           # set.
+           #
+           # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+           # variable. Setting by environment variable takes priority over
+           # setting through the config file.
+           #
+           # Defaults to 0.5, which will half the size of all caches.
+           #
+           #global_factor: 1.0
+
+           # A dictionary of cache name to cache factor for that individual
+           # cache. Overrides the global cache factor for a given cache.
+           #
+           # These can also be set through environment variables comprised
+           # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+           # letters and underscores. Setting by environment variable
+           # takes priority over setting through the config file.
+           # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+           #
+           per_cache_factors:
+             #get_users_who_share_room_with_user: 2.0
+        """
+
+    def read_config(self, config, **kwargs):
+        self.event_cache_size = self.parse_size(
+            config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
+        )
+        self.cache_factors = {}  # type: Dict[str, float]
+
+        cache_config = config.get("caches") or {}
+        self.global_factor = cache_config.get(
+            "global_factor", properties.default_factor_size
+        )
+        if not isinstance(self.global_factor, (int, float)):
+            raise ConfigError("caches.global_factor must be a number.")
+
+        # Set the global one so that it's reflected in new caches
+        properties.default_factor_size = self.global_factor
+
+        # Load cache factors from the config
+        individual_factors = cache_config.get("per_cache_factors") or {}
+        if not isinstance(individual_factors, dict):
+            raise ConfigError("caches.per_cache_factors must be a dictionary")
+
+        # Override factors from environment if necessary
+        individual_factors.update(
+            {
+                key[len(_CACHE_PREFIX) + 1 :].lower(): float(val)
+                for key, val in self._environ.items()
+                if key.startswith(_CACHE_PREFIX + "_")
+            }
+        )
+
+        for cache, factor in individual_factors.items():
+            if not isinstance(factor, (int, float)):
+                raise ConfigError(
+                    "caches.per_cache_factors.%s must be a number" % (cache.lower(),)
+                )
+            self.cache_factors[cache.lower()] = factor
+
+        # Resize all caches (if necessary) with the new factors we've loaded
+        self.resize_all_caches()
+
+        # Store this function so that it can be called from other classes without
+        # needing an instance of Config
+        properties.resize_all_caches_func = self.resize_all_caches
+
+    def resize_all_caches(self):
+        """Ensure all cache sizes are up to date
+
+        For each cache, run the mapped callback function with either
+        a specific cache factor or the default, global one.
+        """
+        for cache_name, callback in _CACHES.items():
+            new_factor = self.cache_factors.get(cache_name, self.global_factor)
+            callback(new_factor)
diff --git a/synapse/config/database.py b/synapse/config/database.py
index 5b662d1b01..1064c2697b 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -68,10 +68,6 @@ database:
   name: sqlite3
   args:
     database: %(database_path)s
-
-# Number of events to cache in memory.
-#
-#event_cache_size: 10K
 """
 
 
@@ -116,8 +112,6 @@ class DatabaseConfig(Config):
         self.databases = []
 
     def read_config(self, config, **kwargs):
-        self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K"))
-
         # We *experimentally* support specifying multiple databases via the
         # `databases` key. This is a map from a label to database config in the
         # same format as the `database` config option, plus an extra
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 996d3e6bf7..2c7b3a699f 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -17,6 +17,7 @@
 from ._base import RootConfig
 from .api import ApiConfig
 from .appservice import AppServiceConfig
+from .cache import CacheConfig
 from .captcha import CaptchaConfig
 from .cas import CasConfig
 from .consent_config import ConsentConfig
@@ -55,6 +56,7 @@ class HomeServerConfig(RootConfig):
     config_classes = [
         ServerConfig,
         TlsConfig,
+        CacheConfig,
         DatabaseConfig,
         LoggingConfig,
         RatelimitConfig,
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 58eb47c69c..3cef747a4d 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -49,7 +49,6 @@ from synapse.http.proxyagent import ProxyAgent
 from synapse.logging.context import make_deferred_yieldable
 from synapse.logging.opentracing import set_tag, start_active_span, tags
 from synapse.util.async_helpers import timeout_deferred
-from synapse.util.caches import CACHE_SIZE_FACTOR
 
 logger = logging.getLogger(__name__)
 
@@ -241,7 +240,10 @@ class SimpleHttpClient(object):
         # tends to do so in batches, so we need to allow the pool to keep
         # lots of idle connections around.
         pool = HTTPConnectionPool(self.reactor)
-        pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5))
+        # XXX: The justification for using the cache factor here is that larger instances
+        # will need both more cache and more connections.
+        # Still, this should probably be a separate dial
+        pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5))
         pool.cachedConnectionTimeout = 2 * 60
 
         self.agent = ProxyAgent(
diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py
index a248103191..ab7f948ed4 100644
--- a/synapse/metrics/_exposition.py
+++ b/synapse/metrics/_exposition.py
@@ -33,6 +33,8 @@ from prometheus_client import REGISTRY
 
 from twisted.web.resource import Resource
 
+from synapse.util import caches
+
 try:
     from prometheus_client.samples import Sample
 except ImportError:
@@ -103,13 +105,15 @@ def nameify_sample(sample):
 
 
 def generate_latest(registry, emit_help=False):
-    output = []
 
-    for metric in registry.collect():
+    # Trigger the cache metrics to be rescraped, which updates the common
+    # metrics but do not produce metrics themselves
+    for collector in caches.collectors_by_name.values():
+        collector.collect()
 
-        if metric.name.startswith("__unused"):
-            continue
+    output = []
 
+    for metric in registry.collect():
         if not metric.samples:
             # No samples, don't bother.
             continue
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 433ca2f416..e75d964ac8 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -51,6 +51,7 @@ push_rules_delta_state_cache_metric = register_cache(
     "cache",
     "push_rules_delta_state_cache_metric",
     cache=[],  # Meaningless size, as this isn't a cache that stores values
+    resizable=False,
 )
 
 
@@ -67,7 +68,8 @@ class BulkPushRuleEvaluator(object):
         self.room_push_rule_cache_metrics = register_cache(
             "cache",
             "room_push_rule_cache",
-            cache=[],  # Meaningless size, as this isn't a cache that stores values
+            cache=[],  # Meaningless size, as this isn't a cache that stores values,
+            resizable=False,
         )
 
     @defer.inlineCallbacks
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 4cd702b5fa..11032491af 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -22,7 +22,7 @@ from six import string_types
 
 from synapse.events import EventBase
 from synapse.types import UserID
-from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
+from synapse.util.caches import register_cache
 from synapse.util.caches.lrucache import LruCache
 
 logger = logging.getLogger(__name__)
@@ -165,7 +165,7 @@ class PushRuleEvaluatorForEvent(object):
 
 
 # Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches
-regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
+regex_cache = LruCache(50000)
 register_cache("cache", "regex_push_cache", regex_cache)
 
 
diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py
index fbf996e33a..1a38f53dfb 100644
--- a/synapse/replication/slave/storage/client_ips.py
+++ b/synapse/replication/slave/storage/client_ips.py
@@ -15,7 +15,6 @@
 
 from synapse.storage.data_stores.main.client_ips import LAST_SEEN_GRANULARITY
 from synapse.storage.database import Database
-from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.caches.descriptors import Cache
 
 from ._base import BaseSlavedStore
@@ -26,7 +25,7 @@ class SlavedClientIpStore(BaseSlavedStore):
         super(SlavedClientIpStore, self).__init__(database, db_conn, hs)
 
         self.client_ip_last_seen = Cache(
-            name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
+            name="client_ip_last_seen", keylen=4, max_entries=50000
         )
 
     def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 4afefc6b1d..2fa529fcd0 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -35,7 +35,6 @@ from synapse.state import v1, v2
 from synapse.storage.data_stores.main.events_worker import EventRedactBehaviour
 from synapse.types import StateMap
 from synapse.util.async_helpers import Linearizer
-from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.metrics import Measure, measure_func
 
@@ -53,7 +52,6 @@ state_groups_histogram = Histogram(
 KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key"))
 
 
-SIZE_OF_CACHE = 100000 * get_cache_factor_for("state_cache")
 EVICTION_TIMEOUT_SECONDS = 60 * 60
 
 
@@ -447,7 +445,7 @@ class StateResolutionHandler(object):
         self._state_cache = ExpiringCache(
             cache_name="state_cache",
             clock=self.clock,
-            max_len=SIZE_OF_CACHE,
+            max_len=100000,
             expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000,
             iterable=True,
             reset_expiry_on_get=True,
diff --git a/synapse/storage/data_stores/main/client_ips.py b/synapse/storage/data_stores/main/client_ips.py
index 92bc06919b..71f8d43a76 100644
--- a/synapse/storage/data_stores/main/client_ips.py
+++ b/synapse/storage/data_stores/main/client_ips.py
@@ -22,7 +22,6 @@ from twisted.internet import defer
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import Database, make_tuple_comparison_clause
-from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.caches.descriptors import Cache
 
 logger = logging.getLogger(__name__)
@@ -361,7 +360,7 @@ class ClientIpStore(ClientIpBackgroundUpdateStore):
     def __init__(self, database: Database, db_conn, hs):
 
         self.client_ip_last_seen = Cache(
-            name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
+            name="client_ip_last_seen", keylen=4, max_entries=50000
         )
 
         super(ClientIpStore, self).__init__(database, db_conn, hs)
diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py
index 73df6b33ba..b8c1bbdf99 100644
--- a/synapse/storage/data_stores/main/events_worker.py
+++ b/synapse/storage/data_stores/main/events_worker.py
@@ -75,7 +75,10 @@ class EventsWorkerStore(SQLBaseStore):
         super(EventsWorkerStore, self).__init__(database, db_conn, hs)
 
         self._get_event_cache = Cache(
-            "*getEvent*", keylen=3, max_entries=hs.config.event_cache_size
+            "*getEvent*",
+            keylen=3,
+            max_entries=hs.config.caches.event_cache_size,
+            apply_cache_factor_from_config=False,
         )
 
         self._event_fetch_lock = threading.Condition()
diff --git a/synapse/storage/data_stores/state/store.py b/synapse/storage/data_stores/state/store.py
index 57a5267663..f3ad1e4369 100644
--- a/synapse/storage/data_stores/state/store.py
+++ b/synapse/storage/data_stores/state/store.py
@@ -28,7 +28,6 @@ from synapse.storage.data_stores.state.bg_updates import StateBackgroundUpdateSt
 from synapse.storage.database import Database
 from synapse.storage.state import StateFilter
 from synapse.types import StateMap
-from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.dictionary_cache import DictionaryCache
 
@@ -90,11 +89,10 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
         self._state_group_cache = DictionaryCache(
             "*stateGroupCache*",
             # TODO: this hasn't been tuned yet
-            50000 * get_cache_factor_for("stateGroupCache"),
+            50000,
         )
         self._state_group_members_cache = DictionaryCache(
-            "*stateGroupMembersCache*",
-            500000 * get_cache_factor_for("stateGroupMembersCache"),
+            "*stateGroupMembersCache*", 500000,
         )
 
     @cached(max_entries=10000, iterable=True)
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index da5077b471..4b8a0c7a8f 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2019 The Matrix.org Foundation C.I.C.
+# Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,27 +15,17 @@
 # limitations under the License.
 
 import logging
-import os
-from typing import Dict
+from typing import Callable, Dict, Optional
 
 import six
 from six.moves import intern
 
-from prometheus_client.core import REGISTRY, Gauge, GaugeMetricFamily
-
-logger = logging.getLogger(__name__)
-
-CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
+import attr
+from prometheus_client.core import Gauge
 
+from synapse.config.cache import add_resizable_cache
 
-def get_cache_factor_for(cache_name):
-    env_var = "SYNAPSE_CACHE_FACTOR_" + cache_name.upper()
-    factor = os.environ.get(env_var)
-    if factor:
-        return float(factor)
-
-    return CACHE_SIZE_FACTOR
-
+logger = logging.getLogger(__name__)
 
 caches_by_name = {}
 collectors_by_name = {}  # type: Dict
@@ -44,6 +34,7 @@ cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
 cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
 cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
 cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
+cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
 
 response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
 response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
@@ -53,67 +44,82 @@ response_cache_evicted = Gauge(
 response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
 
 
-def register_cache(cache_type, cache_name, cache, collect_callback=None):
-    """Register a cache object for metric collection.
+@attr.s
+class CacheMetric(object):
+
+    _cache = attr.ib()
+    _cache_type = attr.ib(type=str)
+    _cache_name = attr.ib(type=str)
+    _collect_callback = attr.ib(type=Optional[Callable])
+
+    hits = attr.ib(default=0)
+    misses = attr.ib(default=0)
+    evicted_size = attr.ib(default=0)
+
+    def inc_hits(self):
+        self.hits += 1
+
+    def inc_misses(self):
+        self.misses += 1
+
+    def inc_evictions(self, size=1):
+        self.evicted_size += size
+
+    def describe(self):
+        return []
+
+    def collect(self):
+        try:
+            if self._cache_type == "response_cache":
+                response_cache_size.labels(self._cache_name).set(len(self._cache))
+                response_cache_hits.labels(self._cache_name).set(self.hits)
+                response_cache_evicted.labels(self._cache_name).set(self.evicted_size)
+                response_cache_total.labels(self._cache_name).set(
+                    self.hits + self.misses
+                )
+            else:
+                cache_size.labels(self._cache_name).set(len(self._cache))
+                cache_hits.labels(self._cache_name).set(self.hits)
+                cache_evicted.labels(self._cache_name).set(self.evicted_size)
+                cache_total.labels(self._cache_name).set(self.hits + self.misses)
+                if getattr(self._cache, "max_size", None):
+                    cache_max_size.labels(self._cache_name).set(self._cache.max_size)
+            if self._collect_callback:
+                self._collect_callback()
+        except Exception as e:
+            logger.warning("Error calculating metrics for %s: %s", self._cache_name, e)
+            raise
+
+
+def register_cache(
+    cache_type: str,
+    cache_name: str,
+    cache,
+    collect_callback: Optional[Callable] = None,
+    resizable: bool = True,
+    resize_callback: Optional[Callable] = None,
+) -> CacheMetric:
+    """Register a cache object for metric collection and resizing.
 
     Args:
-        cache_type (str):
-        cache_name (str): name of the cache
-        cache (object): cache itself
-        collect_callback (callable|None): if not None, a function which is called during
-            metric collection to update additional metrics.
+        cache_type
+        cache_name: name of the cache
+        cache: cache itself
+        collect_callback: If given, a function which is called during metric
+            collection to update additional metrics.
+        resizable: Whether this cache supports being resized.
+        resize_callback: A function which can be called to resize the cache.
 
     Returns:
         CacheMetric: an object which provides inc_{hits,misses,evictions} methods
     """
+    if resizable:
+        if not resize_callback:
+            resize_callback = getattr(cache, "set_cache_factor")
+        add_resizable_cache(cache_name, resize_callback)
 
-    # Check if the metric is already registered. Unregister it, if so.
-    # This usually happens during tests, as at runtime these caches are
-    # effectively singletons.
+    metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
     metric_name = "cache_%s_%s" % (cache_type, cache_name)
-    if metric_name in collectors_by_name.keys():
-        REGISTRY.unregister(collectors_by_name[metric_name])
-
-    class CacheMetric(object):
-
-        hits = 0
-        misses = 0
-        evicted_size = 0
-
-        def inc_hits(self):
-            self.hits += 1
-
-        def inc_misses(self):
-            self.misses += 1
-
-        def inc_evictions(self, size=1):
-            self.evicted_size += size
-
-        def describe(self):
-            return []
-
-        def collect(self):
-            try:
-                if cache_type == "response_cache":
-                    response_cache_size.labels(cache_name).set(len(cache))
-                    response_cache_hits.labels(cache_name).set(self.hits)
-                    response_cache_evicted.labels(cache_name).set(self.evicted_size)
-                    response_cache_total.labels(cache_name).set(self.hits + self.misses)
-                else:
-                    cache_size.labels(cache_name).set(len(cache))
-                    cache_hits.labels(cache_name).set(self.hits)
-                    cache_evicted.labels(cache_name).set(self.evicted_size)
-                    cache_total.labels(cache_name).set(self.hits + self.misses)
-                if collect_callback:
-                    collect_callback()
-            except Exception as e:
-                logger.warning("Error calculating metrics for %s: %s", cache_name, e)
-                raise
-
-            yield GaugeMetricFamily("__unused", "")
-
-    metric = CacheMetric()
-    REGISTRY.register(metric)
     caches_by_name[cache_name] = cache
     collectors_by_name[metric_name] = metric
     return metric
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 2e8f6543e5..cd48262420 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import functools
 import inspect
 import logging
@@ -30,7 +31,6 @@ from twisted.internet import defer
 from synapse.logging.context import make_deferred_yieldable, preserve_fn
 from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import ObservableDeferred
-from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
 
@@ -81,7 +81,6 @@ class CacheEntry(object):
 class Cache(object):
     __slots__ = (
         "cache",
-        "max_entries",
         "name",
         "keylen",
         "thread",
@@ -89,7 +88,29 @@ class Cache(object):
         "_pending_deferred_cache",
     )
 
-    def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False):
+    def __init__(
+        self,
+        name: str,
+        max_entries: int = 1000,
+        keylen: int = 1,
+        tree: bool = False,
+        iterable: bool = False,
+        apply_cache_factor_from_config: bool = True,
+    ):
+        """
+        Args:
+            name: The name of the cache
+            max_entries: Maximum amount of entries that the cache will hold
+            keylen: The length of the tuple used as the cache key
+            tree: Use a TreeCache instead of a dict as the underlying cache type
+            iterable: If True, count each item in the cached object as an entry,
+                rather than each cached object
+            apply_cache_factor_from_config: Whether cache factors specified in the
+                config file affect `max_entries`
+
+        Returns:
+            Cache
+        """
         cache_type = TreeCache if tree else dict
         self._pending_deferred_cache = cache_type()
 
@@ -99,6 +120,7 @@ class Cache(object):
             cache_type=cache_type,
             size_callback=(lambda d: len(d)) if iterable else None,
             evicted_callback=self._on_evicted,
+            apply_cache_factor_from_config=apply_cache_factor_from_config,
         )
 
         self.name = name
@@ -111,6 +133,10 @@ class Cache(object):
             collect_callback=self._metrics_collection_callback,
         )
 
+    @property
+    def max_entries(self):
+        return self.cache.max_size
+
     def _on_evicted(self, evicted_count):
         self.metrics.inc_evictions(evicted_count)
 
@@ -370,13 +396,11 @@ class CacheDescriptor(_CacheDescriptorBase):
             cache_context=cache_context,
         )
 
-        max_entries = int(max_entries * get_cache_factor_for(orig.__name__))
-
         self.max_entries = max_entries
         self.tree = tree
         self.iterable = iterable
 
-    def __get__(self, obj, objtype=None):
+    def __get__(self, obj, owner):
         cache = Cache(
             name=self.orig.__name__,
             max_entries=self.max_entries,
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index cddf1ed515..2726b67b6d 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -18,6 +18,7 @@ from collections import OrderedDict
 
 from six import iteritems, itervalues
 
+from synapse.config import cache as cache_config
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.util.caches import register_cache
 
@@ -51,15 +52,16 @@ class ExpiringCache(object):
                 an item on access. Defaults to False.
             iterable (bool): If true, the size is calculated by summing the
                 sizes of all entries, rather than the number of entries.
-
         """
         self._cache_name = cache_name
 
+        self._original_max_size = max_len
+
+        self._max_size = int(max_len * cache_config.properties.default_factor_size)
+
         self._clock = clock
 
-        self._max_len = max_len
         self._expiry_ms = expiry_ms
-
         self._reset_expiry_on_get = reset_expiry_on_get
 
         self._cache = OrderedDict()
@@ -82,9 +84,11 @@ class ExpiringCache(object):
     def __setitem__(self, key, value):
         now = self._clock.time_msec()
         self._cache[key] = _CacheEntry(now, value)
+        self.evict()
 
+    def evict(self):
         # Evict if there are now too many items
-        while self._max_len and len(self) > self._max_len:
+        while self._max_size and len(self) > self._max_size:
             _key, value = self._cache.popitem(last=False)
             if self.iterable:
                 self.metrics.inc_evictions(len(value.value))
@@ -170,6 +174,23 @@ class ExpiringCache(object):
         else:
             return len(self._cache)
 
+    def set_cache_factor(self, factor: float) -> bool:
+        """
+        Set the cache factor for this individual cache.
+
+        This will trigger a resize if it changes, which may require evicting
+        items from the cache.
+
+        Returns:
+            bool: Whether the cache changed size or not.
+        """
+        new_size = int(self._original_max_size * factor)
+        if new_size != self._max_size:
+            self._max_size = new_size
+            self.evict()
+            return True
+        return False
+
 
 class _CacheEntry(object):
     __slots__ = ["time", "value"]
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 1536cb64f3..29fabac3cd 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -13,10 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import threading
 from functools import wraps
+from typing import Callable, Optional, Type, Union
 
+from synapse.config import cache as cache_config
 from synapse.util.caches.treecache import TreeCache
 
 
@@ -52,17 +53,18 @@ class LruCache(object):
 
     def __init__(
         self,
-        max_size,
-        keylen=1,
-        cache_type=dict,
-        size_callback=None,
-        evicted_callback=None,
+        max_size: int,
+        keylen: int = 1,
+        cache_type: Type[Union[dict, TreeCache]] = dict,
+        size_callback: Optional[Callable] = None,
+        evicted_callback: Optional[Callable] = None,
+        apply_cache_factor_from_config: bool = True,
     ):
         """
         Args:
-            max_size (int):
+            max_size: The maximum amount of entries the cache can hold
 
-            keylen (int):
+            keylen: The length of the tuple used as the cache key
 
             cache_type (type):
                 type of underlying cache to be used. Typically one of dict
@@ -73,9 +75,23 @@ class LruCache(object):
             evicted_callback (func(int)|None):
                 if not None, called on eviction with the size of the evicted
                 entry
+
+            apply_cache_factor_from_config (bool): If true, `max_size` will be
+                multiplied by a cache factor derived from the homeserver config
         """
         cache = cache_type()
         self.cache = cache  # Used for introspection.
+
+        # Save the original max size, and apply the default size factor.
+        self._original_max_size = max_size
+        # We previously didn't apply the cache factor here, and as such some caches were
+        # not affected by the global cache factor. Add an option here to disable applying
+        # the cache factor when a cache is created
+        if apply_cache_factor_from_config:
+            self.max_size = int(max_size * cache_config.properties.default_factor_size)
+        else:
+            self.max_size = int(max_size)
+
         list_root = _Node(None, None, None, None)
         list_root.next_node = list_root
         list_root.prev_node = list_root
@@ -83,7 +99,7 @@ class LruCache(object):
         lock = threading.Lock()
 
         def evict():
-            while cache_len() > max_size:
+            while cache_len() > self.max_size:
                 todelete = list_root.prev_node
                 evicted_len = delete_node(todelete)
                 cache.pop(todelete.key, None)
@@ -236,6 +252,7 @@ class LruCache(object):
             return key in cache
 
         self.sentinel = object()
+        self._on_resize = evict
         self.get = cache_get
         self.set = cache_set
         self.setdefault = cache_set_default
@@ -266,3 +283,20 @@ class LruCache(object):
 
     def __contains__(self, key):
         return self.contains(key)
+
+    def set_cache_factor(self, factor: float) -> bool:
+        """
+        Set the cache factor for this individual cache.
+
+        This will trigger a resize if it changes, which may require evicting
+        items from the cache.
+
+        Returns:
+            bool: Whether the cache changed size or not.
+        """
+        new_size = int(self._original_max_size * factor)
+        if new_size != self.max_size:
+            self.max_size = new_size
+            self._on_resize()
+            return True
+        return False
diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py
index b68f9fe0d4..a6c60888e5 100644
--- a/synapse/util/caches/response_cache.py
+++ b/synapse/util/caches/response_cache.py
@@ -38,7 +38,7 @@ class ResponseCache(object):
         self.timeout_sec = timeout_ms / 1000.0
 
         self._name = name
-        self._metrics = register_cache("response_cache", name, self)
+        self._metrics = register_cache("response_cache", name, self, resizable=False)
 
     def size(self):
         return len(self.pending_result_cache)
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index e54f80d76e..2a161bf244 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+import math
 from typing import Dict, FrozenSet, List, Mapping, Optional, Set, Union
 
 from six import integer_types
@@ -46,7 +47,8 @@ class StreamChangeCache:
         max_size=10000,
         prefilled_cache: Optional[Mapping[EntityType, int]] = None,
     ):
-        self._max_size = int(max_size * caches.CACHE_SIZE_FACTOR)
+        self._original_max_size = max_size
+        self._max_size = math.floor(max_size)
         self._entity_to_key = {}  # type: Dict[EntityType, int]
 
         # map from stream id to the a set of entities which changed at that stream id.
@@ -58,12 +60,31 @@ class StreamChangeCache:
         #
         self._earliest_known_stream_pos = current_stream_pos
         self.name = name
-        self.metrics = caches.register_cache("cache", self.name, self._cache)
+        self.metrics = caches.register_cache(
+            "cache", self.name, self._cache, resize_callback=self.set_cache_factor
+        )
 
         if prefilled_cache:
             for entity, stream_pos in prefilled_cache.items():
                 self.entity_has_changed(entity, stream_pos)
 
+    def set_cache_factor(self, factor: float) -> bool:
+        """
+        Set the cache factor for this individual cache.
+
+        This will trigger a resize if it changes, which may require evicting
+        items from the cache.
+
+        Returns:
+            bool: Whether the cache changed size or not.
+        """
+        new_size = math.floor(self._original_max_size * factor)
+        if new_size != self._max_size:
+            self.max_size = new_size
+            self._evict()
+            return True
+        return False
+
     def has_entity_changed(self, entity: EntityType, stream_pos: int) -> bool:
         """Returns True if the entity may have been updated since stream_pos
         """
@@ -171,6 +192,7 @@ class StreamChangeCache:
             e1 = self._cache[stream_pos] = set()
         e1.add(entity)
         self._entity_to_key[entity] = stream_pos
+        self._evict()
 
         # if the cache is too big, remove entries
         while len(self._cache) > self._max_size:
@@ -179,6 +201,13 @@ class StreamChangeCache:
             for entity in r:
                 del self._entity_to_key[entity]
 
+    def _evict(self):
+        while len(self._cache) > self._max_size:
+            k, r = self._cache.popitem(0)
+            self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos)
+            for entity in r:
+                self._entity_to_key.pop(entity, None)
+
     def get_max_pos_of_last_change(self, entity: EntityType) -> int:
 
         """Returns an upper bound of the stream id of the last change to an
diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py
index 99646c7cf0..6437aa907e 100644
--- a/synapse/util/caches/ttlcache.py
+++ b/synapse/util/caches/ttlcache.py
@@ -38,7 +38,7 @@ class TTLCache(object):
 
         self._timer = timer
 
-        self._metrics = register_cache("ttl", cache_name, self)
+        self._metrics = register_cache("ttl", cache_name, self, resizable=False)
 
     def set(self, key, value, ttl):
         """Add/update an entry in the cache
diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py
new file mode 100644
index 0000000000..2920279125
--- /dev/null
+++ b/tests/config/test_cache.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.config._base import Config, RootConfig
+from synapse.config.cache import CacheConfig, add_resizable_cache
+from synapse.util.caches.lrucache import LruCache
+
+from tests.unittest import TestCase
+
+
+class FakeServer(Config):
+    section = "server"
+
+
+class TestConfig(RootConfig):
+    config_classes = [FakeServer, CacheConfig]
+
+
+class CacheConfigTests(TestCase):
+    def setUp(self):
+        # Reset caches before each test
+        TestConfig().caches.reset()
+
+    def test_individual_caches_from_environ(self):
+        """
+        Individual cache factors will be loaded from the environment.
+        """
+        config = {}
+        t = TestConfig()
+        t.caches._environ = {
+            "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
+            "SYNAPSE_NOT_CACHE": "BLAH",
+        }
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(dict(t.caches.cache_factors), {"something_or_other": 2.0})
+
+    def test_config_overrides_environ(self):
+        """
+        Individual cache factors defined in the environment will take precedence
+        over those in the config.
+        """
+        config = {"caches": {"per_cache_factors": {"foo": 2, "bar": 3}}}
+        t = TestConfig()
+        t.caches._environ = {
+            "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
+            "SYNAPSE_CACHE_FACTOR_FOO": 1,
+        }
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(
+            dict(t.caches.cache_factors),
+            {"foo": 1.0, "bar": 3.0, "something_or_other": 2.0},
+        )
+
+    def test_individual_instantiated_before_config_load(self):
+        """
+        If a cache is instantiated before the config is read, it will be given
+        the default cache size in the interim, and then resized once the config
+        is loaded.
+        """
+        cache = LruCache(100)
+
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 50)
+
+        config = {"caches": {"per_cache_factors": {"foo": 3}}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(cache.max_size, 300)
+
+    def test_individual_instantiated_after_config_load(self):
+        """
+        If a cache is instantiated after the config is read, it will be
+        immediately resized to the correct size given the per_cache_factor if
+        there is one.
+        """
+        config = {"caches": {"per_cache_factors": {"foo": 2}}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        cache = LruCache(100)
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 200)
+
+    def test_global_instantiated_before_config_load(self):
+        """
+        If a cache is instantiated before the config is read, it will be given
+        the default cache size in the interim, and then resized to the new
+        default cache size once the config is loaded.
+        """
+        cache = LruCache(100)
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 50)
+
+        config = {"caches": {"global_factor": 4}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        self.assertEqual(cache.max_size, 400)
+
+    def test_global_instantiated_after_config_load(self):
+        """
+        If a cache is instantiated after the config is read, it will be
+        immediately resized to the correct size given the global factor if there
+        is no per-cache factor.
+        """
+        config = {"caches": {"global_factor": 1.5}}
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        cache = LruCache(100)
+        add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
+        self.assertEqual(cache.max_size, 150)
diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py
index e37260a820..5a50e4fdd4 100644
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@@ -25,8 +25,8 @@ from synapse.util.caches.descriptors import Cache, cached
 from tests import unittest
 
 
-class CacheTestCase(unittest.TestCase):
-    def setUp(self):
+class CacheTestCase(unittest.HomeserverTestCase):
+    def prepare(self, reactor, clock, homeserver):
         self.cache = Cache("test")
 
     def test_empty(self):
@@ -96,7 +96,7 @@ class CacheTestCase(unittest.TestCase):
         cache.get(3)
 
 
-class CacheDecoratorTestCase(unittest.TestCase):
+class CacheDecoratorTestCase(unittest.HomeserverTestCase):
     @defer.inlineCallbacks
     def test_passthrough(self):
         class A(object):
@@ -239,7 +239,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
         callcount2 = [0]
 
         class A(object):
-            @cached(max_entries=4)  # HACK: This makes it 2 due to cache factor
+            @cached(max_entries=2)
             def func(self, key):
                 callcount[0] += 1
                 return key
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index 31710949a8..ef296e7dab 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -43,7 +43,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = self.as_yaml_files
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         self.as_token = "token1"
@@ -110,7 +110,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = self.as_yaml_files
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         self.as_list = [
@@ -422,7 +422,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = [f1, f2]
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         database = hs.get_datastores().databases[0]
@@ -440,7 +440,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = [f1, f2]
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         with self.assertRaises(ConfigError) as cm:
@@ -464,7 +464,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         )
 
         hs.config.app_service_config_files = [f1, f2]
-        hs.config.event_cache_size = 1
+        hs.config.caches.event_cache_size = 1
         hs.config.password_providers = []
 
         with self.assertRaises(ConfigError) as cm:
diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py
index cdee0a9e60..278961c331 100644
--- a/tests/storage/test_base.py
+++ b/tests/storage/test_base.py
@@ -51,7 +51,8 @@ class SQLBaseStoreTestCase(unittest.TestCase):
 
         config = Mock()
         config._disable_native_upserts = True
-        config.event_cache_size = 1
+        config.caches = Mock()
+        config.caches.event_cache_size = 1
         hs = TestHomeServer("test", config=config)
 
         sqlite_config = {"name": "sqlite3"}
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 270f853d60..f5f63d8ed6 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
+from synapse.util.caches.descriptors import Cache
 
 from tests import unittest
 
@@ -129,3 +130,36 @@ class BuildInfoTests(unittest.TestCase):
         self.assertTrue(b"osversion=" in items[0])
         self.assertTrue(b"pythonversion=" in items[0])
         self.assertTrue(b"version=" in items[0])
+
+
+class CacheMetricsTests(unittest.HomeserverTestCase):
+    def test_cache_metric(self):
+        """
+        Caches produce metrics reflecting their state when scraped.
+        """
+        CACHE_NAME = "cache_metrics_test_fgjkbdfg"
+        cache = Cache(CACHE_NAME, max_entries=777)
+
+        items = {
+            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
+            for x in filter(
+                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        }
+
+        self.assertEqual(items["synapse_util_caches_cache_size"], "0.0")
+        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
+
+        cache.prefill("1", "hi")
+
+        items = {
+            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
+            for x in filter(
+                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        }
+
+        self.assertEqual(items["synapse_util_caches_cache_size"], "1.0")
+        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
diff --git a/tests/util/test_expiring_cache.py b/tests/util/test_expiring_cache.py
index 50bc7702d2..49ffeebd0e 100644
--- a/tests/util/test_expiring_cache.py
+++ b/tests/util/test_expiring_cache.py
@@ -21,7 +21,7 @@ from tests.utils import MockClock
 from .. import unittest
 
 
-class ExpiringCacheTestCase(unittest.TestCase):
+class ExpiringCacheTestCase(unittest.HomeserverTestCase):
     def test_get_set(self):
         clock = MockClock()
         cache = ExpiringCache("test", clock, max_len=1)
diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py
index 786947375d..0adb2174af 100644
--- a/tests/util/test_lrucache.py
+++ b/tests/util/test_lrucache.py
@@ -22,7 +22,7 @@ from synapse.util.caches.treecache import TreeCache
 from .. import unittest
 
 
-class LruCacheTestCase(unittest.TestCase):
+class LruCacheTestCase(unittest.HomeserverTestCase):
     def test_get_set(self):
         cache = LruCache(1)
         cache["key"] = "value"
@@ -84,7 +84,7 @@ class LruCacheTestCase(unittest.TestCase):
         self.assertEquals(len(cache), 0)
 
 
-class LruCacheCallbacksTestCase(unittest.TestCase):
+class LruCacheCallbacksTestCase(unittest.HomeserverTestCase):
     def test_get(self):
         m = Mock()
         cache = LruCache(1)
@@ -233,7 +233,7 @@ class LruCacheCallbacksTestCase(unittest.TestCase):
         self.assertEquals(m3.call_count, 1)
 
 
-class LruCacheSizedTestCase(unittest.TestCase):
+class LruCacheSizedTestCase(unittest.HomeserverTestCase):
     def test_evict(self):
         cache = LruCache(5, size_callback=len)
         cache["key1"] = [0]
diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py
index 6857933540..13b753e367 100644
--- a/tests/util/test_stream_change_cache.py
+++ b/tests/util/test_stream_change_cache.py
@@ -1,11 +1,9 @@
-from mock import patch
-
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
 from tests import unittest
 
 
-class StreamChangeCacheTests(unittest.TestCase):
+class StreamChangeCacheTests(unittest.HomeserverTestCase):
     """
     Tests for StreamChangeCache.
     """
@@ -54,7 +52,6 @@ class StreamChangeCacheTests(unittest.TestCase):
         self.assertTrue(cache.has_entity_changed("user@foo.com", 0))
         self.assertTrue(cache.has_entity_changed("not@here.website", 0))
 
-    @patch("synapse.util.caches.CACHE_SIZE_FACTOR", 1.0)
     def test_entity_has_changed_pops_off_start(self):
         """
         StreamChangeCache.entity_has_changed will respect the max size and
diff --git a/tests/utils.py b/tests/utils.py
index f9be62b499..59c020a051 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -167,6 +167,7 @@ def default_config(name, parse=False):
         # disable user directory updates, because they get done in the
         # background, which upsets the test runner.
         "update_user_directory": False,
+        "caches": {"global_factor": 1},
     }
 
     if parse:
-- 
cgit 1.5.1


From 1a1da60ad2c9172fe487cd38a164b39df60f4cb5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 12 May 2020 11:20:48 +0100
Subject: Fix new flake8 errors (#7470)

---
 changelog.d/7470.misc       |  1 +
 synapse/app/_base.py        |  5 +++--
 synapse/config/server.py    |  2 +-
 synapse/notifier.py         | 10 ++++++----
 synapse/push/mailer.py      |  7 +++++--
 synapse/storage/database.py |  4 ++--
 tests/config/test_load.py   |  2 +-
 7 files changed, 19 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/7470.misc

(limited to 'synapse/app')

diff --git a/changelog.d/7470.misc b/changelog.d/7470.misc
new file mode 100644
index 0000000000..45e66ecf48
--- /dev/null
+++ b/changelog.d/7470.misc
@@ -0,0 +1 @@
+Fix linting errors in new version of Flake8.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 628292b890..dedff81af3 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -22,6 +22,7 @@ import sys
 import traceback
 
 from daemonize import Daemonize
+from typing_extensions import NoReturn
 
 from twisted.internet import defer, error, reactor
 from twisted.protocols.tls import TLSMemoryBIOFactory
@@ -139,9 +140,9 @@ def start_reactor(
             run()
 
 
-def quit_with_error(error_string):
+def quit_with_error(error_string: str) -> NoReturn:
     message_lines = error_string.split("\n")
-    line_length = max(len(l) for l in message_lines if len(l) < 80) + 2
+    line_length = max(len(line) for line in message_lines if len(line) < 80) + 2
     sys.stderr.write("*" * line_length + "\n")
     for line in message_lines:
         sys.stderr.write(" %s\n" % (line.rstrip(),))
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 6d88231843..ed28da3deb 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -522,7 +522,7 @@ class ServerConfig(Config):
         )
 
     def has_tls_listener(self) -> bool:
-        return any(l["tls"] for l in self.listeners)
+        return any(listener["tls"] for listener in self.listeners)
 
     def generate_config_section(
         self, server_name, data_dir_path, open_private_ports, listeners, **kwargs
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 71d9ed62b0..87c120a59c 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -15,7 +15,7 @@
 
 import logging
 from collections import namedtuple
-from typing import Callable, List
+from typing import Callable, Iterable, List, TypeVar
 
 from prometheus_client import Counter
 
@@ -42,12 +42,14 @@ users_woken_by_stream_counter = Counter(
     "synapse_notifier_users_woken_by_stream", "", ["stream"]
 )
 
+T = TypeVar("T")
+
 
 # TODO(paul): Should be shared somewhere
-def count(func, l):
-    """Return the number of items in l for which func returns true."""
+def count(func: Callable[[T], bool], it: Iterable[T]) -> int:
+    """Return the number of items in it for which func returns true."""
     n = 0
-    for x in l:
+    for x in it:
         if func(x):
             n += 1
     return n
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 73580c1c6c..ab33abbeed 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -19,6 +19,7 @@ import logging
 import time
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
+from typing import Iterable, List, TypeVar
 
 from six.moves import urllib
 
@@ -41,6 +42,8 @@ from synapse.visibility import filter_events_for_client
 
 logger = logging.getLogger(__name__)
 
+T = TypeVar("T")
+
 
 MESSAGE_FROM_PERSON_IN_ROOM = (
     "You have a message on %(app)s from %(person)s in the %(room)s room..."
@@ -638,10 +641,10 @@ def safe_text(raw_text):
     )
 
 
-def deduped_ordered_list(l):
+def deduped_ordered_list(it: Iterable[T]) -> List[T]:
     seen = set()
     ret = []
-    for item in l:
+    for item in it:
         if item not in seen:
             seen.add(item)
             ret.append(item)
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 2b635d6ca0..c3d0863429 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -214,9 +214,9 @@ class LoggingTransaction:
     def executemany(self, sql: str, *args: Any):
         self._do_execute(self.txn.executemany, sql, *args)
 
-    def _make_sql_one_line(self, sql):
+    def _make_sql_one_line(self, sql: str) -> str:
         "Strip newlines out of SQL so that the loggers in the DB are on one line"
-        return " ".join(l.strip() for l in sql.splitlines() if l.strip())
+        return " ".join(line.strip() for line in sql.splitlines() if line.strip())
 
     def _do_execute(self, func, sql, *args):
         sql = self._make_sql_one_line(sql)
diff --git a/tests/config/test_load.py b/tests/config/test_load.py
index b3e557bd6a..734a9983e8 100644
--- a/tests/config/test_load.py
+++ b/tests/config/test_load.py
@@ -122,7 +122,7 @@ class ConfigLoadingTestCase(unittest.TestCase):
 
         with open(self.file, "r") as f:
             contents = f.readlines()
-        contents = [l for l in contents if needle not in l]
+        contents = [line for line in contents if needle not in line]
         with open(self.file, "w") as f:
             f.write("".join(contents))
 
-- 
cgit 1.5.1


From 1124111a12c3ab35f8b68d9031695aec8b2c7c50 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 13 May 2020 17:15:40 +0100
Subject: Allow censoring of events to happen on workers. (#7492)

This is safe as we can now write to cache invalidation stream on workers, and is required for when we move event persistence off master.
---
 changelog.d/7492.misc                             | 1 +
 synapse/app/generic_worker.py                     | 2 ++
 synapse/handlers/message.py                       | 2 --
 synapse/storage/data_stores/main/censor_events.py | 7 +------
 4 files changed, 4 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/7492.misc

(limited to 'synapse/app')

diff --git a/changelog.d/7492.misc b/changelog.d/7492.misc
new file mode 100644
index 0000000000..5ad31819d6
--- /dev/null
+++ b/changelog.d/7492.misc
@@ -0,0 +1 @@
+Allow censoring of events to happen on workers.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 667ad20428..bccb1140b2 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -122,6 +122,7 @@ from synapse.rest.client.v2_alpha.register import RegisterRestServlet
 from synapse.rest.client.versions import VersionsRestServlet
 from synapse.rest.key.v2 import KeyApiV2Resource
 from synapse.server import HomeServer
+from synapse.storage.data_stores.main.censor_events import CensorEventsStore
 from synapse.storage.data_stores.main.media_repository import MediaRepositoryStore
 from synapse.storage.data_stores.main.monthly_active_users import (
     MonthlyActiveUsersWorkerStore,
@@ -442,6 +443,7 @@ class GenericWorkerSlavedStore(
     SlavedGroupServerStore,
     SlavedAccountDataStore,
     SlavedPusherStore,
+    CensorEventsStore,
     SlavedEventStore,
     SlavedKeyStore,
     RoomStore,
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index a622a600b4..0242521cc6 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -72,7 +72,6 @@ class MessageHandler(object):
         self.state_store = self.storage.state
         self._event_serializer = hs.get_event_client_serializer()
         self._ephemeral_events_enabled = hs.config.enable_ephemeral_messages
-        self._is_worker_app = bool(hs.config.worker_app)
 
         # The scheduled call to self._expire_event. None if no call is currently
         # scheduled.
@@ -260,7 +259,6 @@ class MessageHandler(object):
         Args:
             event (EventBase): The event to schedule the expiry of.
         """
-        assert not self._is_worker_app
 
         expiry_ts = event.content.get(EventContentFields.SELF_DESTRUCT_AFTER)
         if not isinstance(expiry_ts, int) or event.is_state():
diff --git a/synapse/storage/data_stores/main/censor_events.py b/synapse/storage/data_stores/main/censor_events.py
index 49683b4d5a..2d48261724 100644
--- a/synapse/storage/data_stores/main/censor_events.py
+++ b/synapse/storage/data_stores/main/censor_events.py
@@ -33,15 +33,10 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class CensorEventsStore(CacheInvalidationWorkerStore, EventsWorkerStore, SQLBaseStore):
+class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBaseStore):
     def __init__(self, database: Database, db_conn, hs: "HomeServer"):
         super().__init__(database, db_conn, hs)
 
-        # This should only exist on master for now
-        assert (
-            hs.config.worker.worker_app is None
-        ), "Can only instantiate CensorEventsStore on master"
-
         def _censor_redactions():
             return run_as_background_process(
                 "_censor_redactions", self._censor_redactions
-- 
cgit 1.5.1


From 4734a7bbe4d08d68c5f04dd76cd5bcfb4cd9b6be Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 14 May 2020 14:01:39 +0100
Subject: Move EventStream handling into default ReplicationDataHandler (#7493)

This is so that the logic can happen on both master and workers when we move event persistence out.
---
 changelog.d/7493.misc             |  1 +
 synapse/app/generic_worker.py     | 33 ++-------------------------------
 synapse/replication/tcp/client.py | 37 +++++++++++++++++++++++++++++++++----
 synapse/server.py                 |  2 +-
 synapse/server.pyi                |  3 +++
 5 files changed, 40 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/7493.misc

(limited to 'synapse/app')

diff --git a/changelog.d/7493.misc b/changelog.d/7493.misc
new file mode 100644
index 0000000000..575c55a99b
--- /dev/null
+++ b/changelog.d/7493.misc
@@ -0,0 +1 @@
+Move EventStream handling into default ReplicationDataHandler.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index bccb1140b2..2e3add7ac5 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -26,7 +26,6 @@ from twisted.web.resource import NoResource
 
 import synapse
 import synapse.events
-from synapse.api.constants import EventTypes
 from synapse.api.errors import HttpResponseException, SynapseError
 from synapse.api.urls import (
     CLIENT_API_PREFIX,
@@ -81,11 +80,6 @@ from synapse.replication.tcp.streams import (
     ToDeviceStream,
     TypingStream,
 )
-from synapse.replication.tcp.streams.events import (
-    EventsStream,
-    EventsStreamEventRow,
-    EventsStreamRow,
-)
 from synapse.rest.admin import register_servlets_for_media_repo
 from synapse.rest.client.v1 import events
 from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet
@@ -633,7 +627,7 @@ class GenericWorkerServer(HomeServer):
 
 class GenericWorkerReplicationHandler(ReplicationDataHandler):
     def __init__(self, hs):
-        super(GenericWorkerReplicationHandler, self).__init__(hs.get_datastore())
+        super(GenericWorkerReplicationHandler, self).__init__(hs)
 
         self.store = hs.get_datastore()
         self.typing_handler = hs.get_typing_handler()
@@ -659,30 +653,7 @@ class GenericWorkerReplicationHandler(ReplicationDataHandler):
                     stream_name, token, rows
                 )
 
-            if stream_name == EventsStream.NAME:
-                # We shouldn't get multiple rows per token for events stream, so
-                # we don't need to optimise this for multiple rows.
-                for row in rows:
-                    if row.type != EventsStreamEventRow.TypeId:
-                        continue
-                    assert isinstance(row, EventsStreamRow)
-
-                    event = await self.store.get_event(
-                        row.data.event_id, allow_rejected=True
-                    )
-                    if event.rejected_reason:
-                        continue
-
-                    extra_users = ()
-                    if event.type == EventTypes.Member:
-                        extra_users = (event.state_key,)
-                    max_token = self.store.get_room_max_stream_ordering()
-                    self.notifier.on_new_room_event(
-                        event, token, max_token, extra_users
-                    )
-
-                await self.pusher_pool.on_new_notifications(token, token)
-            elif stream_name == PushRulesStream.NAME:
+            if stream_name == PushRulesStream.NAME:
                 self.notifier.on_new_event(
                     "push_rules_key", token, users=[row.user_id for row in rows]
                 )
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 20cb8a654f..28826302f5 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -16,12 +16,17 @@
 """
 
 import logging
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Tuple
 
 from twisted.internet.protocol import ReconnectingClientFactory
 
-from synapse.replication.slave.storage._base import BaseSlavedStore
+from synapse.api.constants import EventTypes
 from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
+from synapse.replication.tcp.streams.events import (
+    EventsStream,
+    EventsStreamEventRow,
+    EventsStreamRow,
+)
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -83,8 +88,10 @@ class ReplicationDataHandler:
     to handle updates in additional ways.
     """
 
-    def __init__(self, store: BaseSlavedStore):
-        self.store = store
+    def __init__(self, hs: "HomeServer"):
+        self.store = hs.get_datastore()
+        self.pusher_pool = hs.get_pusherpool()
+        self.notifier = hs.get_notifier()
 
     async def on_rdata(
         self, stream_name: str, instance_name: str, token: int, rows: list
@@ -102,6 +109,28 @@ class ReplicationDataHandler:
         """
         self.store.process_replication_rows(stream_name, instance_name, token, rows)
 
+        if stream_name == EventsStream.NAME:
+            # We shouldn't get multiple rows per token for events stream, so
+            # we don't need to optimise this for multiple rows.
+            for row in rows:
+                if row.type != EventsStreamEventRow.TypeId:
+                    continue
+                assert isinstance(row, EventsStreamRow)
+
+                event = await self.store.get_event(
+                    row.data.event_id, allow_rejected=True
+                )
+                if event.rejected_reason:
+                    continue
+
+                extra_users = ()  # type: Tuple[str, ...]
+                if event.type == EventTypes.Member:
+                    extra_users = (event.state_key,)
+                max_token = self.store.get_room_max_stream_ordering()
+                self.notifier.on_new_room_event(event, token, max_token, extra_users)
+
+            await self.pusher_pool.on_new_notifications(token, token)
+
     async def on_position(self, stream_name: str, instance_name: str, token: int):
         self.store.process_replication_rows(stream_name, instance_name, token, [])
 
diff --git a/synapse/server.py b/synapse/server.py
index b4aea81e24..c530f1aa1a 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -581,7 +581,7 @@ class HomeServer(object):
         return ReplicationStreamer(self)
 
     def build_replication_data_handler(self):
-        return ReplicationDataHandler(self.get_datastore())
+        return ReplicationDataHandler(self)
 
     def remove_pusher(self, app_id, push_key, user_id):
         return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)
diff --git a/synapse/server.pyi b/synapse/server.pyi
index 31a9cc0389..9e7fad7e6e 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -19,6 +19,7 @@ import synapse.handlers.room_member
 import synapse.handlers.set_password
 import synapse.http.client
 import synapse.notifier
+import synapse.push.pusherpool
 import synapse.replication.tcp.client
 import synapse.replication.tcp.handler
 import synapse.rest.media.v1.media_repository
@@ -133,3 +134,5 @@ class HomeServer(object):
         pass
     def get_macaroon_generator(self) -> synapse.handlers.auth.MacaroonGenerator:
         pass
+    def get_pusherpool(self) -> synapse.push.pusherpool.PusherPool:
+        pass
-- 
cgit 1.5.1


From 03aff4c75ed3b0b106ed1395b3d03b1ab9b013a6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 15 May 2020 17:22:47 +0100
Subject: Add a worker store for search insertion. (#7516)

This is required as both event persistence and the background update needs access to this function. It should be perfectly safe for two workers to write to that table at the same time.
---
 changelog.d/7516.misc                      |  1 +
 synapse/app/generic_worker.py              |  2 +
 synapse/storage/data_stores/main/search.py | 96 +++++++++++++++---------------
 3 files changed, 52 insertions(+), 47 deletions(-)
 create mode 100644 changelog.d/7516.misc

(limited to 'synapse/app')

diff --git a/changelog.d/7516.misc b/changelog.d/7516.misc
new file mode 100644
index 0000000000..94b0fd49b2
--- /dev/null
+++ b/changelog.d/7516.misc
@@ -0,0 +1 @@
+Add a worker store for search insertion, required for moving event persistence off master.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 2e3add7ac5..ab801108ca 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -122,6 +122,7 @@ from synapse.storage.data_stores.main.monthly_active_users import (
     MonthlyActiveUsersWorkerStore,
 )
 from synapse.storage.data_stores.main.presence import UserPresenceState
+from synapse.storage.data_stores.main.search import SearchWorkerStore
 from synapse.storage.data_stores.main.ui_auth import UIAuthWorkerStore
 from synapse.storage.data_stores.main.user_directory import UserDirectoryStore
 from synapse.types import ReadReceipt
@@ -451,6 +452,7 @@ class GenericWorkerSlavedStore(
     SlavedFilteringStore,
     MonthlyActiveUsersWorkerStore,
     MediaRepositoryStore,
+    SearchWorkerStore,
     BaseSlavedStore,
 ):
     def __init__(self, database, db_conn, hs):
diff --git a/synapse/storage/data_stores/main/search.py b/synapse/storage/data_stores/main/search.py
index ee75b92344..13f49d8060 100644
--- a/synapse/storage/data_stores/main/search.py
+++ b/synapse/storage/data_stores/main/search.py
@@ -37,7 +37,55 @@ SearchEntry = namedtuple(
 )
 
 
-class SearchBackgroundUpdateStore(SQLBaseStore):
+class SearchWorkerStore(SQLBaseStore):
+    def store_search_entries_txn(self, txn, entries):
+        """Add entries to the search table
+
+        Args:
+            txn (cursor):
+            entries (iterable[SearchEntry]):
+                entries to be added to the table
+        """
+        if not self.hs.config.enable_search:
+            return
+        if isinstance(self.database_engine, PostgresEngine):
+            sql = (
+                "INSERT INTO event_search"
+                " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
+                " VALUES (?,?,?,to_tsvector('english', ?),?,?)"
+            )
+
+            args = (
+                (
+                    entry.event_id,
+                    entry.room_id,
+                    entry.key,
+                    entry.value,
+                    entry.stream_ordering,
+                    entry.origin_server_ts,
+                )
+                for entry in entries
+            )
+
+            txn.executemany(sql, args)
+
+        elif isinstance(self.database_engine, Sqlite3Engine):
+            sql = (
+                "INSERT INTO event_search (event_id, room_id, key, value)"
+                " VALUES (?,?,?,?)"
+            )
+            args = (
+                (entry.event_id, entry.room_id, entry.key, entry.value)
+                for entry in entries
+            )
+
+            txn.executemany(sql, args)
+        else:
+            # This should be unreachable.
+            raise Exception("Unrecognized database engine")
+
+
+class SearchBackgroundUpdateStore(SearchWorkerStore):
 
     EVENT_SEARCH_UPDATE_NAME = "event_search"
     EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
@@ -296,52 +344,6 @@ class SearchBackgroundUpdateStore(SQLBaseStore):
 
         return num_rows
 
-    def store_search_entries_txn(self, txn, entries):
-        """Add entries to the search table
-
-        Args:
-            txn (cursor):
-            entries (iterable[SearchEntry]):
-                entries to be added to the table
-        """
-        if not self.hs.config.enable_search:
-            return
-        if isinstance(self.database_engine, PostgresEngine):
-            sql = (
-                "INSERT INTO event_search"
-                " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
-                " VALUES (?,?,?,to_tsvector('english', ?),?,?)"
-            )
-
-            args = (
-                (
-                    entry.event_id,
-                    entry.room_id,
-                    entry.key,
-                    entry.value,
-                    entry.stream_ordering,
-                    entry.origin_server_ts,
-                )
-                for entry in entries
-            )
-
-            txn.executemany(sql, args)
-
-        elif isinstance(self.database_engine, Sqlite3Engine):
-            sql = (
-                "INSERT INTO event_search (event_id, room_id, key, value)"
-                " VALUES (?,?,?,?)"
-            )
-            args = (
-                (entry.event_id, entry.room_id, entry.key, entry.value)
-                for entry in entries
-            )
-
-            txn.executemany(sql, args)
-        else:
-            # This should be unreachable.
-            raise Exception("Unrecognized database engine")
-
 
 class SearchStore(SearchBackgroundUpdateStore):
     def __init__(self, database: Database, db_conn, hs):
-- 
cgit 1.5.1


From 51055c8c4409e70e8f310fce420b2f2f7f7a257a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 18 May 2020 12:24:48 +0100
Subject: Allow ReplicationRestResource to be added to workers (#7515)

This allows workers to talk to each other over HTTP replication.
---
 changelog.d/7515.misc                |  1 +
 synapse/app/generic_worker.py        |  4 ++++
 synapse/replication/http/__init__.py | 13 ++++++++-----
 3 files changed, 13 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/7515.misc

(limited to 'synapse/app')

diff --git a/changelog.d/7515.misc b/changelog.d/7515.misc
new file mode 100644
index 0000000000..48f3044f90
--- /dev/null
+++ b/changelog.d/7515.misc
@@ -0,0 +1 @@
+Allow `ReplicationRestResource` to be added to workers.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index ab801108ca..506b70443b 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -47,6 +47,7 @@ from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
@@ -570,6 +571,9 @@ class GenericWorkerServer(HomeServer):
                 if name in ["keys", "federation"]:
                     resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self)
 
+                if name == "replication":
+                    resources[REPLICATION_PREFIX] = ReplicationRestResource(self)
+
         root_resource = create_resource_tree(resources, NoResource())
 
         _base.listen_tcp(
diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index 4613b2538c..a909744e93 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -34,9 +34,12 @@ class ReplicationRestResource(JsonResource):
 
     def register_servlets(self, hs):
         send_event.register_servlets(hs, self)
-        membership.register_servlets(hs, self)
         federation.register_servlets(hs, self)
-        login.register_servlets(hs, self)
-        register.register_servlets(hs, self)
-        devices.register_servlets(hs, self)
-        streams.register_servlets(hs, self)
+
+        # The following can't currently be instantiated on workers.
+        if hs.config.worker.worker_app is None:
+            membership.register_servlets(hs, self)
+            login.register_servlets(hs, self)
+            register.register_servlets(hs, self)
+            devices.register_servlets(hs, self)
+            streams.register_servlets(hs, self)
-- 
cgit 1.5.1


From 0bbbd10513008d30c17eb1d1e7ba1d091fb44ec7 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 21 May 2020 14:36:46 +0100
Subject: Stub out GET presence requests in the frontend proxy (#7545)

We don't really make any promises about returning accurate presence data when
presence is disabled, so we may as well just return a static response, rather
than making the master handle a request.
---
 changelog.d/7545.misc         |  1 +
 synapse/app/generic_worker.py | 21 ++++-----------------
 2 files changed, 5 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/7545.misc

(limited to 'synapse/app')

diff --git a/changelog.d/7545.misc b/changelog.d/7545.misc
new file mode 100644
index 0000000000..177ec883e2
--- /dev/null
+++ b/changelog.d/7545.misc
@@ -0,0 +1 @@
+Make worker processes return a stubbed-out response to `GET /presence` requests.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 506b70443b..d751c9772b 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -26,7 +26,7 @@ from twisted.web.resource import NoResource
 
 import synapse
 import synapse.events
-from synapse.api.errors import HttpResponseException, SynapseError
+from synapse.api.errors import SynapseError
 from synapse.api.urls import (
     CLIENT_API_PREFIX,
     FEDERATION_PREFIX,
@@ -137,31 +137,18 @@ logger = logging.getLogger("synapse.app.generic_worker")
 
 class PresenceStatusStubServlet(RestServlet):
     """If presence is disabled this servlet can be used to stub out setting
-    presence status, while proxying the getters to the master instance.
+    presence status.
     """
 
     PATTERNS = client_patterns("/presence/(?P<user_id>[^/]*)/status")
 
     def __init__(self, hs):
         super(PresenceStatusStubServlet, self).__init__()
-        self.http_client = hs.get_simple_http_client()
         self.auth = hs.get_auth()
-        self.main_uri = hs.config.worker_main_http_uri
 
     async def on_GET(self, request, user_id):
-        # Pass through the auth headers, if any, in case the access token
-        # is there.
-        auth_headers = request.requestHeaders.getRawHeaders("Authorization", [])
-        headers = {"Authorization": auth_headers}
-
-        try:
-            result = await self.http_client.get_json(
-                self.main_uri + request.uri.decode("ascii"), headers=headers
-            )
-        except HttpResponseException as e:
-            raise e.to_synapse_error()
-
-        return 200, result
+        await self.auth.get_user_by_req(request)
+        return 200, {"presence": "offline"}
 
     async def on_PUT(self, request, user_id):
         await self.auth.get_user_by_req(request)
-- 
cgit 1.5.1