From 7a5873277ef456e8446a05468ccae2d81e363977 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 5 Jul 2021 16:32:12 +0100
Subject: Add support for evicting cache entries based on last access time.
 (#10205)

---
 changelog.d/10205.feature       |   1 +
 docs/sample_config.yaml         |  62 ++++++-----
 mypy.ini                        |   1 +
 synapse/app/_base.py            |  11 +-
 synapse/config/_base.pyi        |   2 +
 synapse/config/cache.py         |  70 +++++++-----
 synapse/util/caches/lrucache.py | 237 ++++++++++++++++++++++++++++++++++------
 synapse/util/linked_list.py     | 150 +++++++++++++++++++++++++
 tests/util/test_lrucache.py     |  46 +++++++-
 9 files changed, 485 insertions(+), 95 deletions(-)
 create mode 100644 changelog.d/10205.feature
 create mode 100644 synapse/util/linked_list.py

diff --git a/changelog.d/10205.feature b/changelog.d/10205.feature
new file mode 100644
index 0000000000..db3fd22587
--- /dev/null
+++ b/changelog.d/10205.feature
@@ -0,0 +1 @@
+Add support for evicting cache entries based on last access time.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 6fcc022b47..c04aca1f42 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -673,35 +673,41 @@ retention:
 #event_cache_size: 10K
 
 caches:
-   # Controls the global cache factor, which is the default cache factor
-   # for all caches if a specific factor for that cache is not otherwise
-   # set.
-   #
-   # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
-   # variable. Setting by environment variable takes priority over
-   # setting through the config file.
-   #
-   # Defaults to 0.5, which will half the size of all caches.
-   #
-   #global_factor: 1.0
+  # Controls the global cache factor, which is the default cache factor
+  # for all caches if a specific factor for that cache is not otherwise
+  # set.
+  #
+  # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+  # variable. Setting by environment variable takes priority over
+  # setting through the config file.
+  #
+  # Defaults to 0.5, which will half the size of all caches.
+  #
+  #global_factor: 1.0
 
-   # A dictionary of cache name to cache factor for that individual
-   # cache. Overrides the global cache factor for a given cache.
-   #
-   # These can also be set through environment variables comprised
-   # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
-   # letters and underscores. Setting by environment variable
-   # takes priority over setting through the config file.
-   # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
-   #
-   # Some caches have '*' and other characters that are not
-   # alphanumeric or underscores. These caches can be named with or
-   # without the special characters stripped. For example, to specify
-   # the cache factor for `*stateGroupCache*` via an environment
-   # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
-   #
-   per_cache_factors:
-     #get_users_who_share_room_with_user: 2.0
+  # A dictionary of cache name to cache factor for that individual
+  # cache. Overrides the global cache factor for a given cache.
+  #
+  # These can also be set through environment variables comprised
+  # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+  # letters and underscores. Setting by environment variable
+  # takes priority over setting through the config file.
+  # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+  #
+  # Some caches have '*' and other characters that are not
+  # alphanumeric or underscores. These caches can be named with or
+  # without the special characters stripped. For example, to specify
+  # the cache factor for `*stateGroupCache*` via an environment
+  # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
+  #
+  per_cache_factors:
+    #get_users_who_share_room_with_user: 2.0
+
+  # Controls how long an entry can be in a cache without having been
+  # accessed before being evicted. Defaults to None, which means
+  # entries are never evicted based on time.
+  #
+  #expiry_time: 30m
 
 
 ## Database ##
diff --git a/mypy.ini b/mypy.ini
index c4ff0e6618..72ce932d73 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -75,6 +75,7 @@ files =
   synapse/util/daemonize.py,
   synapse/util/hash.py,
   synapse/util/iterutils.py,
+  synapse/util/linked_list.py,
   synapse/util/metrics.py,
   synapse/util/macaroons.py,
   synapse/util/module_loader.py,
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 8879136881..b30571fe49 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -21,7 +21,7 @@ import socket
 import sys
 import traceback
 import warnings
-from typing import Awaitable, Callable, Iterable
+from typing import TYPE_CHECKING, Awaitable, Callable, Iterable
 
 from cryptography.utils import CryptographyDeprecationWarning
 from typing_extensions import NoReturn
@@ -41,10 +41,14 @@ from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.logging.context import PreserveLoggingContext
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.metrics.jemalloc import setup_jemalloc_stats
+from synapse.util.caches.lrucache import setup_expire_lru_cache_entries
 from synapse.util.daemonize import daemonize_process
 from synapse.util.rlimit import change_resource_limit
 from synapse.util.versionstring import get_version_string
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 logger = logging.getLogger(__name__)
 
 # list of tuples of function, args list, kwargs dict
@@ -312,7 +316,7 @@ def refresh_certificate(hs):
         logger.info("Context factories updated.")
 
 
-async def start(hs: "synapse.server.HomeServer"):
+async def start(hs: "HomeServer"):
     """
     Start a Synapse server or worker.
 
@@ -365,6 +369,9 @@ async def start(hs: "synapse.server.HomeServer"):
 
     load_legacy_spam_checkers(hs)
 
+    # If we've configured an expiry time for caches, start the background job now.
+    setup_expire_lru_cache_entries(hs)
+
     # It is now safe to start your Synapse.
     hs.start_listening()
     hs.get_datastore().db_pool.start_profiling()
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index 23ca0c83c1..06fbd1166b 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -5,6 +5,7 @@ from synapse.config import (
     api,
     appservice,
     auth,
+    cache,
     captcha,
     cas,
     consent,
@@ -88,6 +89,7 @@ class RootConfig:
     tracer: tracer.TracerConfig
     redis: redis.RedisConfig
     modules: modules.ModulesConfig
+    caches: cache.CacheConfig
     federation: federation.FederationConfig
 
     config_classes: List = ...
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index 91165ee1ce..7789b40323 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -116,35 +116,41 @@ class CacheConfig(Config):
         #event_cache_size: 10K
 
         caches:
-           # Controls the global cache factor, which is the default cache factor
-           # for all caches if a specific factor for that cache is not otherwise
-           # set.
-           #
-           # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
-           # variable. Setting by environment variable takes priority over
-           # setting through the config file.
-           #
-           # Defaults to 0.5, which will half the size of all caches.
-           #
-           #global_factor: 1.0
-
-           # A dictionary of cache name to cache factor for that individual
-           # cache. Overrides the global cache factor for a given cache.
-           #
-           # These can also be set through environment variables comprised
-           # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
-           # letters and underscores. Setting by environment variable
-           # takes priority over setting through the config file.
-           # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
-           #
-           # Some caches have '*' and other characters that are not
-           # alphanumeric or underscores. These caches can be named with or
-           # without the special characters stripped. For example, to specify
-           # the cache factor for `*stateGroupCache*` via an environment
-           # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
-           #
-           per_cache_factors:
-             #get_users_who_share_room_with_user: 2.0
+          # Controls the global cache factor, which is the default cache factor
+          # for all caches if a specific factor for that cache is not otherwise
+          # set.
+          #
+          # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+          # variable. Setting by environment variable takes priority over
+          # setting through the config file.
+          #
+          # Defaults to 0.5, which will half the size of all caches.
+          #
+          #global_factor: 1.0
+
+          # A dictionary of cache name to cache factor for that individual
+          # cache. Overrides the global cache factor for a given cache.
+          #
+          # These can also be set through environment variables comprised
+          # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+          # letters and underscores. Setting by environment variable
+          # takes priority over setting through the config file.
+          # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+          #
+          # Some caches have '*' and other characters that are not
+          # alphanumeric or underscores. These caches can be named with or
+          # without the special characters stripped. For example, to specify
+          # the cache factor for `*stateGroupCache*` via an environment
+          # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
+          #
+          per_cache_factors:
+            #get_users_who_share_room_with_user: 2.0
+
+          # Controls how long an entry can be in a cache without having been
+          # accessed before being evicted. Defaults to None, which means
+          # entries are never evicted based on time.
+          #
+          #expiry_time: 30m
         """
 
     def read_config(self, config, **kwargs):
@@ -200,6 +206,12 @@ class CacheConfig(Config):
                     e.message  # noqa: B306, DependencyException.message is a property
                 )
 
+        expiry_time = cache_config.get("expiry_time")
+        if expiry_time:
+            self.expiry_time_msec = self.parse_duration(expiry_time)
+        else:
+            self.expiry_time_msec = None
+
         # Resize all caches (if necessary) with the new factors we've loaded
         self.resize_all_caches()
 
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index d89e9d9b1d..4b9d0433ff 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import threading
+import weakref
 from functools import wraps
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Collection,
@@ -31,10 +34,19 @@ from typing import (
 
 from typing_extensions import Literal
 
+from twisted.internet import reactor
+
 from synapse.config import cache as cache_config
-from synapse.util import caches
+from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.util import Clock, caches
 from synapse.util.caches import CacheMetric, register_cache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
+from synapse.util.linked_list import ListNode
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
 
 try:
     from pympler.asizeof import Asizer
@@ -82,19 +94,126 @@ def enumerate_leaves(node, depth):
                 yield m
 
 
+P = TypeVar("P")
+
+
+class _TimedListNode(ListNode[P]):
+    """A `ListNode` that tracks last access time."""
+
+    __slots__ = ["last_access_ts_secs"]
+
+    def update_last_access(self, clock: Clock):
+        self.last_access_ts_secs = int(clock.time())
+
+
+# Whether to insert new cache entries to the global list. We only add to it if
+# time based eviction is enabled.
+USE_GLOBAL_LIST = False
+
+# A linked list of all cache entries, allowing efficient time based eviction.
+GLOBAL_ROOT = ListNode["_Node"].create_root_node()
+
+
+@wrap_as_background_process("LruCache._expire_old_entries")
+async def _expire_old_entries(clock: Clock, expiry_seconds: int):
+    """Walks the global cache list to find cache entries that haven't been
+    accessed in the given number of seconds.
+    """
+
+    now = int(clock.time())
+    node = GLOBAL_ROOT.prev_node
+    assert node is not None
+
+    i = 0
+
+    logger.debug("Searching for stale caches")
+
+    while node is not GLOBAL_ROOT:
+        # Only the root node isn't a `_TimedListNode`.
+        assert isinstance(node, _TimedListNode)
+
+        if node.last_access_ts_secs > now - expiry_seconds:
+            break
+
+        cache_entry = node.get_cache_entry()
+        next_node = node.prev_node
+
+        # The node should always have a reference to a cache entry and a valid
+        # `prev_node`, as we only drop them when we remove the node from the
+        # list.
+        assert next_node is not None
+        assert cache_entry is not None
+        cache_entry.drop_from_cache()
+
+        # If we do lots of work at once we yield to allow other stuff to happen.
+        if (i + 1) % 10000 == 0:
+            logger.debug("Waiting during drop")
+            await clock.sleep(0)
+            logger.debug("Waking during drop")
+
+        node = next_node
+
+        # If we've yielded then our current node may have been evicted, so we
+        # need to check that its still valid.
+        if node.prev_node is None:
+            break
+
+        i += 1
+
+    logger.info("Dropped %d items from caches", i)
+
+
+def setup_expire_lru_cache_entries(hs: "HomeServer"):
+    """Start a background job that expires all cache entries if they have not
+    been accessed for the given number of seconds.
+    """
+    if not hs.config.caches.expiry_time_msec:
+        return
+
+    logger.info(
+        "Expiring LRU caches after %d seconds", hs.config.caches.expiry_time_msec / 1000
+    )
+
+    global USE_GLOBAL_LIST
+    USE_GLOBAL_LIST = True
+
+    clock = hs.get_clock()
+    clock.looping_call(
+        _expire_old_entries, 30 * 1000, clock, hs.config.caches.expiry_time_msec / 1000
+    )
+
+
 class _Node:
-    __slots__ = ["prev_node", "next_node", "key", "value", "callbacks", "memory"]
+    __slots__ = [
+        "_list_node",
+        "_global_list_node",
+        "_cache",
+        "key",
+        "value",
+        "callbacks",
+        "memory",
+    ]
 
     def __init__(
         self,
-        prev_node,
-        next_node,
+        root: "ListNode[_Node]",
         key,
         value,
+        cache: "weakref.ReferenceType[LruCache]",
+        clock: Clock,
         callbacks: Collection[Callable[[], None]] = (),
     ):
-        self.prev_node = prev_node
-        self.next_node = next_node
+        self._list_node = ListNode.insert_after(self, root)
+        self._global_list_node = None
+        if USE_GLOBAL_LIST:
+            self._global_list_node = _TimedListNode.insert_after(self, GLOBAL_ROOT)
+            self._global_list_node.update_last_access(clock)
+
+        # We store a weak reference to the cache object so that this _Node can
+        # remove itself from the cache. If the cache is dropped we ensure we
+        # remove our entries in the lists.
+        self._cache = cache
+
         self.key = key
         self.value = value
 
@@ -116,11 +235,16 @@ class _Node:
             self.memory = (
                 _get_size_of(key)
                 + _get_size_of(value)
+                + _get_size_of(self._list_node, recurse=False)
                 + _get_size_of(self.callbacks, recurse=False)
                 + _get_size_of(self, recurse=False)
             )
             self.memory += _get_size_of(self.memory, recurse=False)
 
+            if self._global_list_node:
+                self.memory += _get_size_of(self._global_list_node, recurse=False)
+                self.memory += _get_size_of(self._global_list_node.last_access_ts_secs)
+
     def add_callbacks(self, callbacks: Collection[Callable[[], None]]) -> None:
         """Add to stored list of callbacks, removing duplicates."""
 
@@ -147,6 +271,32 @@ class _Node:
 
         self.callbacks = None
 
+    def drop_from_cache(self) -> None:
+        """Drop this node from the cache.
+
+        Ensures that the entry gets removed from the cache and that we get
+        removed from all lists.
+        """
+        cache = self._cache()
+        if not cache or not cache.pop(self.key, None):
+            # `cache.pop` should call `drop_from_lists()`, unless this Node had
+            # already been removed from the cache.
+            self.drop_from_lists()
+
+    def drop_from_lists(self) -> None:
+        """Remove this node from the cache lists."""
+        self._list_node.remove_from_list()
+
+        if self._global_list_node:
+            self._global_list_node.remove_from_list()
+
+    def move_to_front(self, clock: Clock, cache_list_root: ListNode) -> None:
+        """Moves this node to the front of all the lists its in."""
+        self._list_node.move_after(cache_list_root)
+        if self._global_list_node:
+            self._global_list_node.move_after(GLOBAL_ROOT)
+            self._global_list_node.update_last_access(clock)
+
 
 class LruCache(Generic[KT, VT]):
     """
@@ -163,6 +313,7 @@ class LruCache(Generic[KT, VT]):
         size_callback: Optional[Callable] = None,
         metrics_collection_callback: Optional[Callable[[], None]] = None,
         apply_cache_factor_from_config: bool = True,
+        clock: Optional[Clock] = None,
     ):
         """
         Args:
@@ -188,6 +339,13 @@ class LruCache(Generic[KT, VT]):
             apply_cache_factor_from_config (bool): If true, `max_size` will be
                 multiplied by a cache factor derived from the homeserver config
         """
+        # Default `clock` to something sensible. Note that we rename it to
+        # `real_clock` so that mypy doesn't think its still `Optional`.
+        if clock is None:
+            real_clock = Clock(reactor)
+        else:
+            real_clock = clock
+
         cache = cache_type()
         self.cache = cache  # Used for introspection.
         self.apply_cache_factor_from_config = apply_cache_factor_from_config
@@ -219,17 +377,31 @@ class LruCache(Generic[KT, VT]):
         # this is exposed for access from outside this class
         self.metrics = metrics
 
-        list_root = _Node(None, None, None, None)
-        list_root.next_node = list_root
-        list_root.prev_node = list_root
+        # We create a single weakref to self here so that we don't need to keep
+        # creating more each time we create a `_Node`.
+        weak_ref_to_self = weakref.ref(self)
+
+        list_root = ListNode[_Node].create_root_node()
 
         lock = threading.Lock()
 
         def evict():
             while cache_len() > self.max_size:
+                # Get the last node in the list (i.e. the oldest node).
                 todelete = list_root.prev_node
-                evicted_len = delete_node(todelete)
-                cache.pop(todelete.key, None)
+
+                # The list root should always have a valid `prev_node` if the
+                # cache is not empty.
+                assert todelete is not None
+
+                # The node should always have a reference to a cache entry, as
+                # we only drop the cache entry when we remove the node from the
+                # list.
+                node = todelete.get_cache_entry()
+                assert node is not None
+
+                evicted_len = delete_node(node)
+                cache.pop(node.key, None)
                 if metrics:
                     metrics.inc_evictions(evicted_len)
 
@@ -255,11 +427,7 @@ class LruCache(Generic[KT, VT]):
         self.len = synchronized(cache_len)
 
         def add_node(key, value, callbacks: Collection[Callable[[], None]] = ()):
-            prev_node = list_root
-            next_node = prev_node.next_node
-            node = _Node(prev_node, next_node, key, value, callbacks)
-            prev_node.next_node = node
-            next_node.prev_node = node
+            node = _Node(list_root, key, value, weak_ref_to_self, real_clock, callbacks)
             cache[key] = node
 
             if size_callback:
@@ -268,23 +436,11 @@ class LruCache(Generic[KT, VT]):
             if caches.TRACK_MEMORY_USAGE and metrics:
                 metrics.inc_memory_usage(node.memory)
 
-        def move_node_to_front(node):
-            prev_node = node.prev_node
-            next_node = node.next_node
-            prev_node.next_node = next_node
-            next_node.prev_node = prev_node
-            prev_node = list_root
-            next_node = prev_node.next_node
-            node.prev_node = prev_node
-            node.next_node = next_node
-            prev_node.next_node = node
-            next_node.prev_node = node
-
-        def delete_node(node):
-            prev_node = node.prev_node
-            next_node = node.next_node
-            prev_node.next_node = next_node
-            next_node.prev_node = prev_node
+        def move_node_to_front(node: _Node):
+            node.move_to_front(real_clock, list_root)
+
+        def delete_node(node: _Node) -> int:
+            node.drop_from_lists()
 
             deleted_len = 1
             if size_callback:
@@ -411,10 +567,13 @@ class LruCache(Generic[KT, VT]):
 
         @synchronized
         def cache_clear() -> None:
-            list_root.next_node = list_root
-            list_root.prev_node = list_root
             for node in cache.values():
                 node.run_and_clear_callbacks()
+                node.drop_from_lists()
+
+            assert list_root.next_node == list_root
+            assert list_root.prev_node == list_root
+
             cache.clear()
             if size_callback:
                 cached_cache_len[0] = 0
@@ -484,3 +643,11 @@ class LruCache(Generic[KT, VT]):
                 self._on_resize()
             return True
         return False
+
+    def __del__(self) -> None:
+        # We're about to be deleted, so we make sure to clear up all the nodes
+        # and run callbacks, etc.
+        #
+        # This happens e.g. in the sync code where we have an expiring cache of
+        # lru caches.
+        self.clear()
diff --git a/synapse/util/linked_list.py b/synapse/util/linked_list.py
new file mode 100644
index 0000000000..a456b136f0
--- /dev/null
+++ b/synapse/util/linked_list.py
@@ -0,0 +1,150 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A circular doubly linked list implementation.
+"""
+
+import threading
+from typing import Generic, Optional, Type, TypeVar
+
+P = TypeVar("P")
+LN = TypeVar("LN", bound="ListNode")
+
+
+class ListNode(Generic[P]):
+    """A node in a circular doubly linked list, with an (optional) reference to
+    a cache entry.
+
+    The reference should only be `None` for the root node or if the node has
+    been removed from the list.
+    """
+
+    # A lock to protect mutating the list prev/next pointers.
+    _LOCK = threading.Lock()
+
+    # We don't use attrs here as in py3.6 you can't have `attr.s(slots=True)`
+    # and inherit from `Generic` for some reason
+    __slots__ = [
+        "cache_entry",
+        "prev_node",
+        "next_node",
+    ]
+
+    def __init__(self, cache_entry: Optional[P] = None) -> None:
+        self.cache_entry = cache_entry
+        self.prev_node: Optional[ListNode[P]] = None
+        self.next_node: Optional[ListNode[P]] = None
+
+    @classmethod
+    def create_root_node(cls: Type["ListNode[P]"]) -> "ListNode[P]":
+        """Create a new linked list by creating a "root" node, which is a node
+        that has prev_node/next_node pointing to itself and no associated cache
+        entry.
+        """
+        root = cls()
+        root.prev_node = root
+        root.next_node = root
+        return root
+
+    @classmethod
+    def insert_after(
+        cls: Type[LN],
+        cache_entry: P,
+        node: "ListNode[P]",
+    ) -> LN:
+        """Create a new list node that is placed after the given node.
+
+        Args:
+            cache_entry: The associated cache entry.
+            node: The existing node in the list to insert the new entry after.
+        """
+        new_node = cls(cache_entry)
+        with cls._LOCK:
+            new_node._refs_insert_after(node)
+        return new_node
+
+    def remove_from_list(self):
+        """Remove this node from the list."""
+        with self._LOCK:
+            self._refs_remove_node_from_list()
+
+        # We drop the reference to the cache entry to break the reference cycle
+        # between the list node and cache entry, allowing the two to be dropped
+        # immediately rather than at the next GC.
+        self.cache_entry = None
+
+    def move_after(self, node: "ListNode"):
+        """Move this node from its current location in the list to after the
+        given node.
+        """
+        with self._LOCK:
+            # We assert that both this node and the target node is still "alive".
+            assert self.prev_node
+            assert self.next_node
+            assert node.prev_node
+            assert node.next_node
+
+            assert self is not node
+
+            # Remove self from the list
+            self._refs_remove_node_from_list()
+
+            # Insert self back into the list, after target node
+            self._refs_insert_after(node)
+
+    def _refs_remove_node_from_list(self):
+        """Internal method to *just* remove the node from the list, without
+        e.g. clearing out the cache entry.
+        """
+        if self.prev_node is None or self.next_node is None:
+            # We've already been removed from the list.
+            return
+
+        prev_node = self.prev_node
+        next_node = self.next_node
+
+        prev_node.next_node = next_node
+        next_node.prev_node = prev_node
+
+        # We set these to None so that we don't get circular references,
+        # allowing us to be dropped without having to go via the GC.
+        self.prev_node = None
+        self.next_node = None
+
+    def _refs_insert_after(self, node: "ListNode"):
+        """Internal method to insert the node after the given node."""
+
+        # This method should only be called when we're not already in the list.
+        assert self.prev_node is None
+        assert self.next_node is None
+
+        # We expect the given node to be in the list and thus have valid
+        # prev/next refs.
+        assert node.next_node
+        assert node.prev_node
+
+        prev_node = node
+        next_node = node.next_node
+
+        self.prev_node = prev_node
+        self.next_node = next_node
+
+        prev_node.next_node = self
+        next_node.prev_node = self
+
+    def get_cache_entry(self) -> Optional[P]:
+        """Get the cache entry, returns None if this is the root node (i.e.
+        cache_entry is None) or if the entry has been dropped.
+        """
+        return self.cache_entry
diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py
index 377904e72e..6578f3411e 100644
--- a/tests/util/test_lrucache.py
+++ b/tests/util/test_lrucache.py
@@ -15,7 +15,7 @@
 
 from unittest.mock import Mock
 
-from synapse.util.caches.lrucache import LruCache
+from synapse.util.caches.lrucache import LruCache, setup_expire_lru_cache_entries
 from synapse.util.caches.treecache import TreeCache
 
 from tests import unittest
@@ -260,3 +260,47 @@ class LruCacheSizedTestCase(unittest.HomeserverTestCase):
         self.assertEquals(cache["key3"], [3])
         self.assertEquals(cache["key4"], [4])
         self.assertEquals(cache["key5"], [5, 6])
+
+
+class TimeEvictionTestCase(unittest.HomeserverTestCase):
+    """Test that time based eviction works correctly."""
+
+    def default_config(self):
+        config = super().default_config()
+
+        config.setdefault("caches", {})["expiry_time"] = "30m"
+
+        return config
+
+    def test_evict(self):
+        setup_expire_lru_cache_entries(self.hs)
+
+        cache = LruCache(5, clock=self.hs.get_clock())
+
+        # Check that we evict entries we haven't accessed for 30 minutes.
+        cache["key1"] = 1
+        cache["key2"] = 2
+
+        self.reactor.advance(20 * 60)
+
+        self.assertEqual(cache.get("key1"), 1)
+
+        self.reactor.advance(20 * 60)
+
+        # We have only touched `key1` in the last 30m, so we expect that to
+        # still be in the cache while `key2` should have been evicted.
+        self.assertEqual(cache.get("key1"), 1)
+        self.assertEqual(cache.get("key2"), None)
+
+        # Check that re-adding an expired key works correctly.
+        cache["key2"] = 3
+        self.assertEqual(cache.get("key2"), 3)
+
+        self.reactor.advance(20 * 60)
+
+        self.assertEqual(cache.get("key2"), 3)
+
+        self.reactor.advance(20 * 60)
+
+        self.assertEqual(cache.get("key1"), None)
+        self.assertEqual(cache.get("key2"), 3)
-- 
cgit 1.4.1


From d7a94a7dcc955e08bf6bc62b95e02965b304af7f Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 6 Jul 2021 11:00:05 +0100
Subject: Add upgrade notes about disk space for events migration (#10314)

---
 CHANGES.md               |  4 ++++
 changelog.d/10314.bugfix |  1 +
 docs/upgrade.md          | 40 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/10314.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 0c64d5bda6..a2fc423096 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,7 @@
+Synapse 1.38.0 (**UNRELEASED**)
+===============================
+This release includes a database schema update which could result in elevated disk usage. See the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade.md#upgrading-to-v1380) for more information.
+
 Synapse 1.37.1 (2021-06-30)
 ===========================
 
diff --git a/changelog.d/10314.bugfix b/changelog.d/10314.bugfix
new file mode 100644
index 0000000000..7ebda7cdc2
--- /dev/null
+++ b/changelog.d/10314.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index a44960c2b8..011aadf638 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -84,7 +84,45 @@ process, for example:
     wget https://packages.matrix.org/debian/pool/main/m/matrix-synapse-py3/matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
-    
+
+
+# Upgrading to v1.38.0
+
+## Re-indexing of `events` table on Postgres databases
+
+This release includes a database schema update which requires re-indexing one of
+the larger tables in the database, `events`. This could result in increased
+disk I/O for several hours or days after upgrading while the migration
+completes. Furthermore, because we have to keep the old indexes until the new
+indexes are ready, it could result in a significant, temporary, increase in
+disk space.
+
+To get a rough idea of the disk space required, check the current size of one
+of the indexes. For example, from a `psql` shell, run the following sql:
+
+```sql
+SELECT pg_size_pretty(pg_relation_size('events_order_room'));
+```
+
+We need to rebuild **four** indexes, so you will need to multiply this result
+by four to give an estimate of the disk space required. For example, on one
+particular server:
+
+```
+synapse=# select pg_size_pretty(pg_relation_size('events_order_room'));
+ pg_size_pretty
+----------------
+ 288 MB
+(1 row)
+```
+
+On this server, it would be wise to ensure that at least 1152MB are free.
+
+The additional disk space will be freed once the migration completes.
+
+SQLite databases are unaffected by this change.
+
+
 # Upgrading to v1.37.0
 
 ## Deprecation of the current spam checker interface
-- 
cgit 1.4.1


From c65067d67307de7688fa39246426370421e56452 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 6 Jul 2021 13:02:37 +0100
Subject: Handle old staged inbound events (#10303)

We might have events in the staging area if the service was restarted while there were unhandled events in the staging area.

Fixes #10295
---
 changelog.d/10303.bugfix                           |  1 +
 synapse/federation/federation_server.py            | 67 ++++++++++++++++++----
 synapse/storage/databases/main/event_federation.py |  9 +++
 3 files changed, 67 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/10303.bugfix

diff --git a/changelog.d/10303.bugfix b/changelog.d/10303.bugfix
new file mode 100644
index 0000000000..c0577c9f73
--- /dev/null
+++ b/changelog.d/10303.bugfix
@@ -0,0 +1 @@
+Ensure that inbound events from federation that were being processed when Synapse was restarted get promptly processed on start up.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index b312d0b809..bf67d0f574 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -148,6 +148,41 @@ class FederationServer(FederationBase):
 
         self._room_prejoin_state_types = hs.config.api.room_prejoin_state
 
+        # Whether we have started handling old events in the staging area.
+        self._started_handling_of_staged_events = False
+
+    @wrap_as_background_process("_handle_old_staged_events")
+    async def _handle_old_staged_events(self) -> None:
+        """Handle old staged events by fetching all rooms that have staged
+        events and start the processing of each of those rooms.
+        """
+
+        # Get all the rooms IDs with staged events.
+        room_ids = await self.store.get_all_rooms_with_staged_incoming_events()
+
+        # We then shuffle them so that if there are multiple instances doing
+        # this work they're less likely to collide.
+        random.shuffle(room_ids)
+
+        for room_id in room_ids:
+            room_version = await self.store.get_room_version(room_id)
+
+            # Try and acquire the processing lock for the room, if we get it start a
+            # background process for handling the events in the room.
+            lock = await self.store.try_acquire_lock(
+                _INBOUND_EVENT_HANDLING_LOCK_NAME, room_id
+            )
+            if lock:
+                logger.info("Handling old staged inbound events in %s", room_id)
+                self._process_incoming_pdus_in_room_inner(
+                    room_id,
+                    room_version,
+                    lock,
+                )
+
+            # We pause a bit so that we don't start handling all rooms at once.
+            await self._clock.sleep(random.uniform(0, 0.1))
+
     async def on_backfill_request(
         self, origin: str, room_id: str, versions: List[str], limit: int
     ) -> Tuple[int, Dict[str, Any]]:
@@ -166,6 +201,12 @@ class FederationServer(FederationBase):
     async def on_incoming_transaction(
         self, origin: str, transaction_data: JsonDict
     ) -> Tuple[int, Dict[str, Any]]:
+        # If we receive a transaction we should make sure that kick off handling
+        # any old events in the staging area.
+        if not self._started_handling_of_staged_events:
+            self._started_handling_of_staged_events = True
+            self._handle_old_staged_events()
+
         # keep this as early as possible to make the calculated origin ts as
         # accurate as possible.
         request_time = self._clock.time_msec()
@@ -882,25 +923,28 @@ class FederationServer(FederationBase):
         room_id: str,
         room_version: RoomVersion,
         lock: Lock,
-        latest_origin: str,
-        latest_event: EventBase,
+        latest_origin: Optional[str] = None,
+        latest_event: Optional[EventBase] = None,
     ) -> None:
         """Process events in the staging area for the given room.
 
         The latest_origin and latest_event args are the latest origin and event
-        received.
+        received (or None to simply pull the next event from the database).
         """
 
         # The common path is for the event we just received be the only event in
         # the room, so instead of pulling the event out of the DB and parsing
         # the event we just pull out the next event ID and check if that matches.
-        next_origin, next_event_id = await self.store.get_next_staged_event_id_for_room(
-            room_id
-        )
-        if next_origin == latest_origin and next_event_id == latest_event.event_id:
-            origin = latest_origin
-            event = latest_event
-        else:
+        if latest_event is not None and latest_origin is not None:
+            (
+                next_origin,
+                next_event_id,
+            ) = await self.store.get_next_staged_event_id_for_room(room_id)
+            if next_origin != latest_origin or next_event_id != latest_event.event_id:
+                latest_origin = None
+                latest_event = None
+
+        if latest_origin is None or latest_event is None:
             next = await self.store.get_next_staged_event_for_room(
                 room_id, room_version
             )
@@ -908,6 +952,9 @@ class FederationServer(FederationBase):
                 return
 
             origin, event = next
+        else:
+            origin = latest_origin
+            event = latest_event
 
         # We loop round until there are no more events in the room in the
         # staging area, or we fail to get the lock (which means another process
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 08d75b0d41..c4474df975 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1207,6 +1207,15 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
 
         return origin, event
 
+    async def get_all_rooms_with_staged_incoming_events(self) -> List[str]:
+        """Get the room IDs of all events currently staged."""
+        return await self.db_pool.simple_select_onecol(
+            table="federation_inbound_events_staging",
+            keyvalues={},
+            retcol="DISTINCT room_id",
+            desc="get_all_rooms_with_staged_incoming_events",
+        )
+
     @wrap_as_background_process("_get_stats_for_federation_staging")
     async def _get_stats_for_federation_staging(self):
         """Update the prometheus metrics for the inbound federation staging area."""
-- 
cgit 1.4.1


From 6655ea558727138a80ea70fdbd9ee89b041f180f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 6 Jul 2021 13:03:16 +0100
Subject: Add script for getting info about recently registered users (#10290)

---
 changelog.d/10290.feature                 |   1 +
 debian/changelog                          |   6 +
 debian/hash_password.1                    |  42 +------
 debian/hash_password.ronn                 |   2 +-
 debian/manpages                           |   1 +
 debian/matrix-synapse-py3.links           |   1 +
 debian/register_new_matrix_user.1         |  37 +------
 debian/register_new_matrix_user.ronn      |   2 +-
 debian/synapse_port_db.1                  |  59 +++-------
 debian/synapse_port_db.ronn               |   8 +-
 debian/synapse_review_recent_signups.1    |  26 +++++
 debian/synapse_review_recent_signups.ronn |  37 +++++++
 debian/synctl.1                           |  42 ++-----
 debian/synctl.ronn                        |   2 +-
 scripts/synapse_review_recent_signups     |  19 ++++
 synapse/_scripts/review_recent_signups.py | 175 ++++++++++++++++++++++++++++++
 synapse/storage/database.py               |   2 +-
 17 files changed, 309 insertions(+), 153 deletions(-)
 create mode 100644 changelog.d/10290.feature
 create mode 100644 debian/synapse_review_recent_signups.1
 create mode 100644 debian/synapse_review_recent_signups.ronn
 create mode 100755 scripts/synapse_review_recent_signups
 create mode 100644 synapse/_scripts/review_recent_signups.py

diff --git a/changelog.d/10290.feature b/changelog.d/10290.feature
new file mode 100644
index 0000000000..4e4c2e24ef
--- /dev/null
+++ b/changelog.d/10290.feature
@@ -0,0 +1 @@
+Add script to print information about recently registered users.
diff --git a/debian/changelog b/debian/changelog
index 35a0cddeaf..cafd03c6c1 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.37.1ubuntu1) UNRELEASED; urgency=medium
+
+  * Add synapse_review_recent_signups script
+
+ -- Erik Johnston <erikj@matrix.org>  Thu, 01 Jul 2021 15:55:03 +0100
+
 matrix-synapse-py3 (1.37.1) stable; urgency=medium
 
   * New synapse release 1.37.1.
diff --git a/debian/hash_password.1 b/debian/hash_password.1
index 383f452991..d64b91e7c8 100644
--- a/debian/hash_password.1
+++ b/debian/hash_password.1
@@ -1,90 +1,58 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "HASH_PASSWORD" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "HASH_PASSWORD" "1" "July 2021" "" ""
 .SH "NAME"
 \fBhash_password\fR \- Calculate the hash of a new password, so that passwords can be reset
-.
 .SH "SYNOPSIS"
 \fBhash_password\fR [\fB\-p\fR|\fB\-\-password\fR [password]] [\fB\-c\fR|\fB\-\-config\fR \fIfile\fR]
-.
 .SH "DESCRIPTION"
 \fBhash_password\fR calculates the hash of a supplied password using bcrypt\.
-.
 .P
 \fBhash_password\fR takes a password as an parameter either on the command line or the \fBSTDIN\fR if not supplied\.
-.
 .P
 It accepts an YAML file which can be used to specify parameters like the number of rounds for bcrypt and password_config section having the pepper value used for the hashing\. By default \fBbcrypt_rounds\fR is set to \fB10\fR\.
-.
 .P
 The hashed password is written on the \fBSTDOUT\fR\.
-.
 .SH "FILES"
 A sample YAML file accepted by \fBhash_password\fR is described below:
-.
 .P
 bcrypt_rounds: 17 password_config: pepper: "random hashing pepper"
-.
 .SH "OPTIONS"
-.
 .TP
 \fB\-p\fR, \fB\-\-password\fR
 Read the password form the command line if [password] is supplied\. If not, prompt the user and read the password form the \fBSTDIN\fR\. It is not recommended to type the password on the command line directly\. Use the STDIN instead\.
-.
 .TP
 \fB\-c\fR, \fB\-\-config\fR
 Read the supplied YAML \fIfile\fR containing the options \fBbcrypt_rounds\fR and the \fBpassword_config\fR section containing the \fBpepper\fR value\.
-.
 .SH "EXAMPLES"
 Hash from the command line:
-.
 .IP "" 4
-.
 .nf
-
 $ hash_password \-p "p@ssw0rd"
 $2b$12$VJNqWQYfsWTEwcELfoSi4Oa8eA17movHqqi8\.X8fWFpum7SxZ9MFe
-.
 .fi
-.
 .IP "" 0
-.
 .P
 Hash from the STDIN:
-.
 .IP "" 4
-.
 .nf
-
 $ hash_password
 Password:
 Confirm password:
 $2b$12$AszlvfmJl2esnyhmn8m/kuR2tdXgROWtWxnX\.rcuAbM8ErLoUhybG
-.
 .fi
-.
 .IP "" 0
-.
 .P
 Using a config file:
-.
 .IP "" 4
-.
 .nf
-
 $ hash_password \-c config\.yml
 Password:
 Confirm password:
 $2b$12$CwI\.wBNr\.w3kmiUlV3T5s\.GT2wH7uebDCovDrCOh18dFedlANK99O
-.
 .fi
-.
 .IP "" 0
-.
 .SH "COPYRIGHT"
-This man page was written by Rahul De <\fIrahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Rahul De <\fI\%mailto:rahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synctl(1), synapse_port_db(1), register_new_matrix_user(1)
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/hash_password.ronn b/debian/hash_password.ronn
index 0b2afa7374..eeb354602d 100644
--- a/debian/hash_password.ronn
+++ b/debian/hash_password.ronn
@@ -66,4 +66,4 @@ for Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synctl(1), synapse_port_db(1), register_new_matrix_user(1)
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/manpages b/debian/manpages
index 2c30583530..4b13f52853 100644
--- a/debian/manpages
+++ b/debian/manpages
@@ -1,4 +1,5 @@
 debian/hash_password.1
 debian/register_new_matrix_user.1
 debian/synapse_port_db.1
+debian/synapse_review_recent_signups.1
 debian/synctl.1
diff --git a/debian/matrix-synapse-py3.links b/debian/matrix-synapse-py3.links
index bf19efa562..53e2965418 100644
--- a/debian/matrix-synapse-py3.links
+++ b/debian/matrix-synapse-py3.links
@@ -1,4 +1,5 @@
 opt/venvs/matrix-synapse/bin/hash_password usr/bin/hash_password
 opt/venvs/matrix-synapse/bin/register_new_matrix_user usr/bin/register_new_matrix_user
 opt/venvs/matrix-synapse/bin/synapse_port_db usr/bin/synapse_port_db
+opt/venvs/matrix-synapse/bin/synapse_review_recent_signups usr/bin/synapse_review_recent_signups
 opt/venvs/matrix-synapse/bin/synctl usr/bin/synctl
diff --git a/debian/register_new_matrix_user.1 b/debian/register_new_matrix_user.1
index 99156a7354..57bfc4e024 100644
--- a/debian/register_new_matrix_user.1
+++ b/debian/register_new_matrix_user.1
@@ -1,72 +1,47 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "REGISTER_NEW_MATRIX_USER" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "REGISTER_NEW_MATRIX_USER" "1" "July 2021" "" ""
 .SH "NAME"
 \fBregister_new_matrix_user\fR \- Used to register new users with a given home server when registration has been disabled
-.
 .SH "SYNOPSIS"
-\fBregister_new_matrix_user\fR options\.\.\.
-.
+\fBregister_new_matrix_user\fR options\|\.\|\.\|\.
 .SH "DESCRIPTION"
 \fBregister_new_matrix_user\fR registers new users with a given home server when registration has been disabled\. For this to work, the home server must be configured with the \'registration_shared_secret\' option set\.
-.
 .P
 This accepts the user credentials like the username, password, is user an admin or not and registers the user onto the homeserver database\. Also, a YAML file containing the shared secret can be provided\. If not, the shared secret can be provided via the command line\.
-.
 .P
 By default it assumes the home server URL to be \fBhttps://localhost:8448\fR\. This can be changed via the \fBserver_url\fR command line option\.
-.
 .SH "FILES"
 A sample YAML file accepted by \fBregister_new_matrix_user\fR is described below:
-.
 .IP "" 4
-.
 .nf
-
 registration_shared_secret: "s3cr3t"
-.
 .fi
-.
 .IP "" 0
-.
 .SH "OPTIONS"
-.
 .TP
 \fB\-u\fR, \fB\-\-user\fR
 Local part of the new user\. Will prompt if omitted\.
-.
 .TP
 \fB\-p\fR, \fB\-\-password\fR
 New password for user\. Will prompt if omitted\. Supplying the password on the command line is not recommended\. Use the STDIN instead\.
-.
 .TP
 \fB\-a\fR, \fB\-\-admin\fR
 Register new user as an admin\. Will prompt if omitted\.
-.
 .TP
 \fB\-c\fR, \fB\-\-config\fR
 Path to server config file containing the shared secret\.
-.
 .TP
 \fB\-k\fR, \fB\-\-shared\-secret\fR
 Shared secret as defined in server config file\. This is an optional parameter as it can be also supplied via the YAML file\.
-.
 .TP
 \fBserver_url\fR
 URL of the home server\. Defaults to \'https://localhost:8448\'\.
-.
 .SH "EXAMPLES"
-.
 .nf
-
 $ register_new_matrix_user \-u user1 \-p p@ssword \-a \-c config\.yaml
-.
 .fi
-.
 .SH "COPYRIGHT"
-This man page was written by Rahul De <\fIrahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Rahul De <\fI\%mailto:rahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synctl(1), synapse_port_db(1), hash_password(1)
+synctl(1), synapse_port_db(1), hash_password(1), synapse_review_recent_signups(1)
diff --git a/debian/register_new_matrix_user.ronn b/debian/register_new_matrix_user.ronn
index 4c22e74dde..0410b1f4cd 100644
--- a/debian/register_new_matrix_user.ronn
+++ b/debian/register_new_matrix_user.ronn
@@ -58,4 +58,4 @@ for Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synctl(1), synapse_port_db(1), hash_password(1)
+synctl(1), synapse_port_db(1), hash_password(1), synapse_review_recent_signups(1)
diff --git a/debian/synapse_port_db.1 b/debian/synapse_port_db.1
index 4e6bc04827..0e7e20001c 100644
--- a/debian/synapse_port_db.1
+++ b/debian/synapse_port_db.1
@@ -1,83 +1,56 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "SYNAPSE_PORT_DB" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "SYNAPSE_PORT_DB" "1" "July 2021" "" ""
 .SH "NAME"
 \fBsynapse_port_db\fR \- A script to port an existing synapse SQLite database to a new PostgreSQL database\.
-.
 .SH "SYNOPSIS"
 \fBsynapse_port_db\fR [\-v] \-\-sqlite\-database=\fIdbfile\fR \-\-postgres\-config=\fIyamlconfig\fR [\-\-curses] [\-\-batch\-size=\fIbatch\-size\fR]
-.
 .SH "DESCRIPTION"
 \fBsynapse_port_db\fR ports an existing synapse SQLite database to a new PostgreSQL database\.
-.
 .P
 SQLite database is specified with \fB\-\-sqlite\-database\fR option and PostgreSQL configuration required to connect to PostgreSQL database is provided using \fB\-\-postgres\-config\fR configuration\. The configuration is specified in YAML format\.
-.
 .SH "OPTIONS"
-.
 .TP
 \fB\-v\fR
 Print log messages in \fBdebug\fR level instead of \fBinfo\fR level\.
-.
 .TP
 \fB\-\-sqlite\-database\fR
 The snapshot of the SQLite database file\. This must not be currently used by a running synapse server\.
-.
 .TP
 \fB\-\-postgres\-config\fR
 The database config file for the PostgreSQL database\.
-.
 .TP
 \fB\-\-curses\fR
 Display a curses based progress UI\.
-.
 .SH "CONFIG FILE"
 The postgres configuration file must be a valid YAML file with the following options\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBdatabase\fR: Database configuration section\. This section header can be ignored and the options below may be specified as top level keys\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBname\fR: Connector to use when connecting to the database\. This value must be \fBpsycopg2\fR\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBargs\fR: DB API 2\.0 compatible arguments to send to the \fBpsycopg2\fR module\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBdbname\fR \- the database name
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBuser\fR \- user name used to authenticate
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBpassword\fR \- password used to authenticate
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBhost\fR \- database host address (defaults to UNIX socket if not provided)
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBport\fR \- connection port number (defaults to 5432 if not provided)
-.
 .IP "" 0
 
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBsynchronous_commit\fR: Optional\. Default is True\. If the value is \fBFalse\fR, enable asynchronous commit and don\'t wait for the server to call fsync before ending the transaction\. See: https://www\.postgresql\.org/docs/current/static/wal\-async\-commit\.html
-.
 .IP "" 0
 
-.
 .IP "" 0
-.
 .P
 Following example illustrates the configuration file format\.
-.
 .IP "" 4
-.
 .nf
-
 database:
   name: psycopg2
   args:
@@ -86,13 +59,9 @@ database:
     password: ORohmi9Eet=ohphi
     host: localhost
   synchronous_commit: false
-.
 .fi
-.
 .IP "" 0
-.
 .SH "COPYRIGHT"
-This man page was written by Sunil Mohan Adapa <\fIsunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Sunil Mohan Adapa <\fI\%mailto:sunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synctl(1), hash_password(1), register_new_matrix_user(1)
+synctl(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/synapse_port_db.ronn b/debian/synapse_port_db.ronn
index fcb32ebd0d..e167af2ba4 100644
--- a/debian/synapse_port_db.ronn
+++ b/debian/synapse_port_db.ronn
@@ -47,7 +47,7 @@ following options.
     * `args`:
       DB API 2.0 compatible arguments to send to the `psycopg2` module.
 
-      * `dbname` - the database name 
+      * `dbname` - the database name
 
       * `user` - user name used to authenticate
 
@@ -58,7 +58,7 @@ following options.
 
       * `port` - connection port number (defaults to 5432 if not
         provided)
-      
+
 
     * `synchronous_commit`:
       Optional.  Default is True.  If the value is `False`, enable
@@ -76,7 +76,7 @@ Following example illustrates the configuration file format.
         password: ORohmi9Eet=ohphi
         host: localhost
       synchronous_commit: false
-  
+
 ## COPYRIGHT
 
 This man page was written by Sunil Mohan Adapa <<sunil@medhas.org>> for
@@ -84,4 +84,4 @@ Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synctl(1), hash_password(1), register_new_matrix_user(1)
+synctl(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/synapse_review_recent_signups.1 b/debian/synapse_review_recent_signups.1
new file mode 100644
index 0000000000..2976c085f9
--- /dev/null
+++ b/debian/synapse_review_recent_signups.1
@@ -0,0 +1,26 @@
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "SYNAPSE_REVIEW_RECENT_SIGNUPS" "1" "July 2021" "" ""
+.SH "NAME"
+\fBsynapse_review_recent_signups\fR \- Print users that have recently registered on Synapse
+.SH "SYNOPSIS"
+\fBsynapse_review_recent_signups\fR \fB\-c\fR|\fB\-\-config\fR \fIfile\fR [\fB\-s\fR|\fB\-\-since\fR \fIperiod\fR] [\fB\-e\fR|\fB\-\-exclude\-emails\fR] [\fB\-u\fR|\fB\-\-only\-users\fR]
+.SH "DESCRIPTION"
+\fBsynapse_review_recent_signups\fR prints out recently registered users on a Synapse server, as well as some basic information about the user\.
+.P
+\fBsynapse_review_recent_signups\fR must be supplied with the config of the Synapse server, so that it can fetch the database config and connect to the database\.
+.SH "OPTIONS"
+.TP
+\fB\-c\fR, \fB\-\-config\fR
+The config file(s) used by the Synapse server\.
+.TP
+\fB\-s\fR, \fB\-\-since\fR
+How far back to search for newly registered users\. Defaults to 7d, i\.e\. up to seven days in the past\. Valid units are \'s\', \'m\', \'h\', \'d\', \'w\', or \'y\'\.
+.TP
+\fB\-e\fR, \fB\-\-exclude\-emails\fR
+Do not print out users that have validated emails associated with their account\.
+.TP
+\fB\-u\fR, \fB\-\-only\-users\fR
+Only print out the user IDs of recently registered users, without any additional information
+.SH "SEE ALSO"
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), hash_password(1)
diff --git a/debian/synapse_review_recent_signups.ronn b/debian/synapse_review_recent_signups.ronn
new file mode 100644
index 0000000000..77f2b040b9
--- /dev/null
+++ b/debian/synapse_review_recent_signups.ronn
@@ -0,0 +1,37 @@
+synapse_review_recent_signups(1) -- Print users that have recently registered on Synapse
+========================================================================================
+
+## SYNOPSIS
+
+`synapse_review_recent_signups` `-c`|`--config` <file> [`-s`|`--since` <period>] [`-e`|`--exclude-emails`] [`-u`|`--only-users`]
+
+## DESCRIPTION
+
+**synapse_review_recent_signups** prints out recently registered users on a
+Synapse server, as well as some basic information about the user.
+
+`synapse_review_recent_signups` must be supplied with the config of the Synapse
+server, so that it can fetch the database config and connect to the database.
+
+
+## OPTIONS
+
+  * `-c`, `--config`:
+    The config file(s) used by the Synapse server.
+
+  * `-s`, `--since`:
+    How far back to search for newly registered users. Defaults to 7d, i.e. up
+    to seven days in the past. Valid units are 's', 'm', 'h', 'd', 'w', or 'y'.
+
+  * `-e`, `--exclude-emails`:
+    Do not print out users that have validated emails associated with their
+    account.
+
+  * `-u`, `--only-users`:
+    Only print out the user IDs of recently registered users, without any
+    additional information
+
+
+## SEE ALSO
+
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), hash_password(1)
diff --git a/debian/synctl.1 b/debian/synctl.1
index af58c8d224..2fdd770f09 100644
--- a/debian/synctl.1
+++ b/debian/synctl.1
@@ -1,63 +1,41 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "SYNCTL" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "SYNCTL" "1" "July 2021" "" ""
 .SH "NAME"
 \fBsynctl\fR \- Synapse server control interface
-.
 .SH "SYNOPSIS"
 Start, stop or restart synapse server\.
-.
 .P
 \fBsynctl\fR {start|stop|restart} [configfile] [\-w|\-\-worker=\fIWORKERCONFIG\fR] [\-a|\-\-all\-processes=\fIWORKERCONFIGDIR\fR]
-.
 .SH "DESCRIPTION"
 \fBsynctl\fR can be used to start, stop or restart Synapse server\. The control operation can be done on all processes or a single worker process\.
-.
 .SH "OPTIONS"
-.
 .TP
 \fBaction\fR
 The value of action should be one of \fBstart\fR, \fBstop\fR or \fBrestart\fR\.
-.
 .TP
 \fBconfigfile\fR
 Optional path of the configuration file to use\. Default value is \fBhomeserver\.yaml\fR\. The configuration file must exist for the operation to succeed\.
-.
 .TP
 \fB\-w\fR, \fB\-\-worker\fR:
-.
-.IP
-Perform start, stop or restart operations on a single worker\. Incompatible with \fB\-a\fR|\fB\-\-all\-processes\fR\. Value passed must be a valid worker\'s configuration file\.
-.
+
 .TP
 \fB\-a\fR, \fB\-\-all\-processes\fR:
-.
-.IP
-Perform start, stop or restart operations on all the workers in the given directory and the main synapse process\. Incompatible with \fB\-w\fR|\fB\-\-worker\fR\. Value passed must be a directory containing valid work configuration files\. All files ending with \fB\.yaml\fR extension shall be considered as configuration files and all other files in the directory are ignored\.
-.
+
 .SH "CONFIGURATION FILE"
 Configuration file may be generated as follows:
-.
 .IP "" 4
-.
 .nf
-
 $ python \-m synapse\.app\.homeserver \-c config\.yaml \-\-generate\-config \-\-server\-name=<server name>
-.
 .fi
-.
 .IP "" 0
-.
 .SH "ENVIRONMENT"
-.
 .TP
 \fBSYNAPSE_CACHE_FACTOR\fR
-Synapse\'s architecture is quite RAM hungry currently \- a lot of recent room data and metadata is deliberately cached in RAM in order to speed up common requests\. This will be improved in future, but for now the easiest way to either reduce the RAM usage (at the risk of slowing things down) is to set the SYNAPSE_CACHE_FACTOR environment variable\. Roughly speaking, a SYNAPSE_CACHE_FACTOR of 1\.0 will max out at around 3\-4GB of resident memory \- this is what we currently run the matrix\.org on\. The default setting is currently 0\.1, which is probably around a ~700MB footprint\. You can dial it down further to 0\.02 if desired, which targets roughly ~512MB\. Conversely you can dial it up if you need performance for lots of users and have a box with a lot of RAM\.
-.
+Synapse\'s architecture is quite RAM hungry currently \- we deliberately cache a lot of recent room data and metadata in RAM in order to speed up common requests\. We\'ll improve this in the future, but for now the easiest way to either reduce the RAM usage (at the risk of slowing things down) is to set the almost\-undocumented \fBSYNAPSE_CACHE_FACTOR\fR environment variable\. The default is 0\.5, which can be decreased to reduce RAM usage in memory constrained enviroments, or increased if performance starts to degrade\.
+.IP
+However, degraded performance due to a low cache factor, common on machines with slow disks, often leads to explosions in memory use due backlogged requests\. In this case, reducing the cache factor will make things worse\. Instead, try increasing it drastically\. 2\.0 is a good starting value\.
 .SH "COPYRIGHT"
-This man page was written by Sunil Mohan Adapa <\fIsunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Sunil Mohan Adapa <\fI\%mailto:sunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synapse_port_db(1), hash_password(1), register_new_matrix_user(1)
+synapse_port_db(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/synctl.ronn b/debian/synctl.ronn
index 10cbda988f..eca6a16815 100644
--- a/debian/synctl.ronn
+++ b/debian/synctl.ronn
@@ -68,4 +68,4 @@ Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synapse_port_db(1), hash_password(1), register_new_matrix_user(1)
+synapse_port_db(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/scripts/synapse_review_recent_signups b/scripts/synapse_review_recent_signups
new file mode 100755
index 0000000000..a36d46e14c
--- /dev/null
+++ b/scripts/synapse_review_recent_signups
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse._scripts.review_recent_signups import main
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse/_scripts/review_recent_signups.py b/synapse/_scripts/review_recent_signups.py
new file mode 100644
index 0000000000..01dc0c4237
--- /dev/null
+++ b/synapse/_scripts/review_recent_signups.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+import time
+from datetime import datetime
+from typing import List
+
+import attr
+
+from synapse.config._base import RootConfig, find_config_files, read_config_files
+from synapse.config.database import DatabaseConfig
+from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn
+from synapse.storage.engines import create_engine
+
+
+class ReviewConfig(RootConfig):
+    "A config class that just pulls out the database config"
+    config_classes = [DatabaseConfig]
+
+
+@attr.s(auto_attribs=True)
+class UserInfo:
+    user_id: str
+    creation_ts: int
+    emails: List[str] = attr.Factory(list)
+    private_rooms: List[str] = attr.Factory(list)
+    public_rooms: List[str] = attr.Factory(list)
+    ips: List[str] = attr.Factory(list)
+
+
+def get_recent_users(txn: LoggingTransaction, since_ms: int) -> List[UserInfo]:
+    """Fetches recently registered users and some info on them."""
+
+    sql = """
+        SELECT name, creation_ts FROM users
+        WHERE
+            ? <= creation_ts
+            AND deactivated = 0
+    """
+
+    txn.execute(sql, (since_ms / 1000,))
+
+    user_infos = [UserInfo(user_id, creation_ts) for user_id, creation_ts in txn]
+
+    for user_info in user_infos:
+        user_info.emails = DatabasePool.simple_select_onecol_txn(
+            txn,
+            table="user_threepids",
+            keyvalues={"user_id": user_info.user_id, "medium": "email"},
+            retcol="address",
+        )
+
+        sql = """
+            SELECT room_id, canonical_alias, name, join_rules
+            FROM local_current_membership
+            INNER JOIN room_stats_state USING (room_id)
+            WHERE user_id = ? AND membership = 'join'
+        """
+
+        txn.execute(sql, (user_info.user_id,))
+        for room_id, canonical_alias, name, join_rules in txn:
+            if join_rules == "public":
+                user_info.public_rooms.append(canonical_alias or name or room_id)
+            else:
+                user_info.private_rooms.append(canonical_alias or name or room_id)
+
+        user_info.ips = DatabasePool.simple_select_onecol_txn(
+            txn,
+            table="user_ips",
+            keyvalues={"user_id": user_info.user_id},
+            retcol="ip",
+        )
+
+    return user_infos
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-c",
+        "--config-path",
+        action="append",
+        metavar="CONFIG_FILE",
+        help="The config files for Synapse.",
+        required=True,
+    )
+    parser.add_argument(
+        "-s",
+        "--since",
+        metavar="duration",
+        help="Specify how far back to review user registrations for, defaults to 7d (i.e. 7 days).",
+        default="7d",
+    )
+    parser.add_argument(
+        "-e",
+        "--exclude-emails",
+        action="store_true",
+        help="Exclude users that have validated email addresses",
+    )
+    parser.add_argument(
+        "-u",
+        "--only-users",
+        action="store_true",
+        help="Only print user IDs that match.",
+    )
+
+    config = ReviewConfig()
+
+    config_args = parser.parse_args(sys.argv[1:])
+    config_files = find_config_files(search_paths=config_args.config_path)
+    config_dict = read_config_files(config_files)
+    config.parse_config_dict(
+        config_dict,
+    )
+
+    since_ms = time.time() * 1000 - config.parse_duration(config_args.since)
+    exclude_users_with_email = config_args.exclude_emails
+    include_context = not config_args.only_users
+
+    for database_config in config.database.databases:
+        if "main" in database_config.databases:
+            break
+
+    engine = create_engine(database_config.config)
+
+    with make_conn(database_config, engine, "review_recent_signups") as db_conn:
+        user_infos = get_recent_users(db_conn.cursor(), since_ms)
+
+    for user_info in user_infos:
+        if exclude_users_with_email and user_info.emails:
+            continue
+
+        if include_context:
+            print_public_rooms = ""
+            if user_info.public_rooms:
+                print_public_rooms = "(" + ", ".join(user_info.public_rooms[:3])
+
+                if len(user_info.public_rooms) > 3:
+                    print_public_rooms += ", ..."
+
+                print_public_rooms += ")"
+
+            print("# Created:", datetime.fromtimestamp(user_info.creation_ts))
+            print("# Email:", ", ".join(user_info.emails) or "None")
+            print("# IPs:", ", ".join(user_info.ips))
+            print(
+                "# Number joined public rooms:",
+                len(user_info.public_rooms),
+                print_public_rooms,
+            )
+            print("# Number joined private rooms:", len(user_info.private_rooms))
+            print("#")
+
+        print(user_info.user_id)
+
+        if include_context:
+            print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index d470cdacde..33c42cf95a 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -111,7 +111,7 @@ def make_conn(
     db_config: DatabaseConnectionConfig,
     engine: BaseDatabaseEngine,
     default_txn_name: str,
-) -> Connection:
+) -> "LoggingDatabaseConnection":
     """Make a new connection to the database and return it.
 
     Returns:
-- 
cgit 1.4.1


From bcb0962a7250d6c1430ad42f5ed234ffea8f2468 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Tue, 6 Jul 2021 14:08:53 +0200
Subject: Fix deactivate a user if he does not have a profile (#10252)

---
 changelog.d/10252.bugfix                  |  1 +
 synapse/storage/databases/main/profile.py |  8 +--
 tests/rest/admin/test_user.py             | 86 ++++++++++++++++++++++++-------
 3 files changed, 73 insertions(+), 22 deletions(-)
 create mode 100644 changelog.d/10252.bugfix

diff --git a/changelog.d/10252.bugfix b/changelog.d/10252.bugfix
new file mode 100644
index 0000000000..c8ddd14528
--- /dev/null
+++ b/changelog.d/10252.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.26.0 where only users who have set profile information could be deactivated with erasure enabled.
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index 9b4e95e134..ba7075caa5 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -73,20 +73,20 @@ class ProfileWorkerStore(SQLBaseStore):
     async def set_profile_displayname(
         self, user_localpart: str, new_displayname: Optional[str]
     ) -> None:
-        await self.db_pool.simple_update_one(
+        await self.db_pool.simple_upsert(
             table="profiles",
             keyvalues={"user_id": user_localpart},
-            updatevalues={"displayname": new_displayname},
+            values={"displayname": new_displayname},
             desc="set_profile_displayname",
         )
 
     async def set_profile_avatar_url(
         self, user_localpart: str, new_avatar_url: Optional[str]
     ) -> None:
-        await self.db_pool.simple_update_one(
+        await self.db_pool.simple_upsert(
             table="profiles",
             keyvalues={"user_id": user_localpart},
-            updatevalues={"avatar_url": new_avatar_url},
+            values={"avatar_url": new_avatar_url},
             desc="set_profile_avatar_url",
         )
 
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index a34d051734..4fccce34fd 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -939,7 +939,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         """
         channel = self.make_request("POST", self.url, b"{}")
 
-        self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(401, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
 
     def test_requester_is_not_admin(self):
@@ -950,7 +950,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request("POST", url, access_token=self.other_user_token)
 
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual("You are not a server admin", channel.json_body["error"])
 
         channel = self.make_request(
@@ -960,7 +960,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             content=b"{}",
         )
 
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual("You are not a server admin", channel.json_body["error"])
 
     def test_user_does_not_exist(self):
@@ -990,7 +990,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(400, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.BAD_JSON, channel.json_body["errcode"])
 
     def test_user_is_not_local(self):
@@ -1006,7 +1006,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
     def test_deactivate_user_erase_true(self):
         """
-        Test deactivating an user and set `erase` to `true`
+        Test deactivating a user and set `erase` to `true`
         """
 
         # Get user
@@ -1016,24 +1016,22 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(False, channel.json_body["deactivated"])
         self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
         self.assertEqual("mxc://servername/mediaid", channel.json_body["avatar_url"])
         self.assertEqual("User1", channel.json_body["displayname"])
 
-        # Deactivate user
-        body = json.dumps({"erase": True})
-
+        # Deactivate and erase user
         channel = self.make_request(
             "POST",
             self.url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"erase": True},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
 
         # Get user
         channel = self.make_request(
@@ -1042,7 +1040,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(True, channel.json_body["deactivated"])
         self.assertEqual(0, len(channel.json_body["threepids"]))
@@ -1053,7 +1051,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
     def test_deactivate_user_erase_false(self):
         """
-        Test deactivating an user and set `erase` to `false`
+        Test deactivating a user and set `erase` to `false`
         """
 
         # Get user
@@ -1063,7 +1061,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(False, channel.json_body["deactivated"])
         self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
@@ -1071,13 +1069,11 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         self.assertEqual("User1", channel.json_body["displayname"])
 
         # Deactivate user
-        body = json.dumps({"erase": False})
-
         channel = self.make_request(
             "POST",
             self.url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"erase": False},
         )
 
         self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
@@ -1089,7 +1085,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(True, channel.json_body["deactivated"])
         self.assertEqual(0, len(channel.json_body["threepids"]))
@@ -1098,6 +1094,60 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
         self._is_erased("@user:test", False)
 
+    def test_deactivate_user_erase_true_no_profile(self):
+        """
+        Test deactivating a user and set `erase` to `true`
+        if user has no profile information (stored in the database table `profiles`).
+        """
+
+        # Users normally have an entry in `profiles`, but occasionally they are created without one.
+        # To test deactivation for users without a profile, we delete the profile information for our user.
+        self.get_success(
+            self.store.db_pool.simple_delete_one(
+                table="profiles", keyvalues={"user_id": "user"}
+            )
+        )
+
+        # Get user
+        channel = self.make_request(
+            "GET",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual("@user:test", channel.json_body["name"])
+        self.assertEqual(False, channel.json_body["deactivated"])
+        self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
+        self.assertIsNone(channel.json_body["avatar_url"])
+        self.assertIsNone(channel.json_body["displayname"])
+
+        # Deactivate and erase user
+        channel = self.make_request(
+            "POST",
+            self.url,
+            access_token=self.admin_user_tok,
+            content={"erase": True},
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        # Get user
+        channel = self.make_request(
+            "GET",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual("@user:test", channel.json_body["name"])
+        self.assertEqual(True, channel.json_body["deactivated"])
+        self.assertEqual(0, len(channel.json_body["threepids"]))
+        self.assertIsNone(channel.json_body["avatar_url"])
+        self.assertIsNone(channel.json_body["displayname"])
+
+        self._is_erased("@user:test", True)
+
     def _is_erased(self, user_id: str, expect: bool) -> None:
         """Assert that the user is erased or not"""
         d = self.store.is_user_erased(user_id)
-- 
cgit 1.4.1


From 37da9db082de686fd425058d29a605763b24cdfa Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 6 Jul 2021 13:54:23 +0100
Subject:  1.38.0rc1

---
 CHANGES.md                | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 changelog.d/10114.misc    |  1 -
 changelog.d/10166.doc     |  1 -
 changelog.d/10205.feature |  1 -
 changelog.d/10213.misc    |  1 -
 changelog.d/10214.feature |  1 -
 changelog.d/10223.bugfix  |  1 -
 changelog.d/10225.feature |  1 -
 changelog.d/10237.misc    |  1 -
 changelog.d/10239.misc    |  1 -
 changelog.d/10242.doc     |  1 -
 changelog.d/10243.feature |  1 -
 changelog.d/10252.bugfix  |  1 -
 changelog.d/10253.misc    |  1 -
 changelog.d/10256.misc    |  1 -
 changelog.d/10258.doc     |  1 -
 changelog.d/10261.feature |  1 -
 changelog.d/10263.feature |  1 -
 changelog.d/10264.bugfix  |  1 -
 changelog.d/10267.bugfix  |  1 -
 changelog.d/10268.misc    |  1 -
 changelog.d/10279.bugfix  |  1 -
 changelog.d/10282.bugfix  |  1 -
 changelog.d/10284.feature |  1 -
 changelog.d/10286.bugfix  |  1 -
 changelog.d/10288.doc     |  1 -
 changelog.d/10290.feature |  1 -
 changelog.d/10291.bugfix  |  1 -
 changelog.d/10292.misc    |  1 -
 changelog.d/10302.doc     |  1 -
 changelog.d/10303.bugfix  |  1 -
 changelog.d/10314.bugfix  |  1 -
 changelog.d/9450.feature  |  1 -
 synapse/__init__.py       |  2 +-
 34 files changed, 50 insertions(+), 33 deletions(-)
 delete mode 100644 changelog.d/10114.misc
 delete mode 100644 changelog.d/10166.doc
 delete mode 100644 changelog.d/10205.feature
 delete mode 100644 changelog.d/10213.misc
 delete mode 100644 changelog.d/10214.feature
 delete mode 100644 changelog.d/10223.bugfix
 delete mode 100644 changelog.d/10225.feature
 delete mode 100644 changelog.d/10237.misc
 delete mode 100644 changelog.d/10239.misc
 delete mode 100644 changelog.d/10242.doc
 delete mode 100644 changelog.d/10243.feature
 delete mode 100644 changelog.d/10252.bugfix
 delete mode 100644 changelog.d/10253.misc
 delete mode 100644 changelog.d/10256.misc
 delete mode 100644 changelog.d/10258.doc
 delete mode 100644 changelog.d/10261.feature
 delete mode 100644 changelog.d/10263.feature
 delete mode 100644 changelog.d/10264.bugfix
 delete mode 100644 changelog.d/10267.bugfix
 delete mode 100644 changelog.d/10268.misc
 delete mode 100644 changelog.d/10279.bugfix
 delete mode 100644 changelog.d/10282.bugfix
 delete mode 100644 changelog.d/10284.feature
 delete mode 100644 changelog.d/10286.bugfix
 delete mode 100644 changelog.d/10288.doc
 delete mode 100644 changelog.d/10290.feature
 delete mode 100644 changelog.d/10291.bugfix
 delete mode 100644 changelog.d/10292.misc
 delete mode 100644 changelog.d/10302.doc
 delete mode 100644 changelog.d/10303.bugfix
 delete mode 100644 changelog.d/10314.bugfix
 delete mode 100644 changelog.d/9450.feature

diff --git a/CHANGES.md b/CHANGES.md
index a2fc423096..273d53690a 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,52 @@
+Synapse 1.38.0rc1 (2021-07-06)
+==============================
+
+Features
+--------
+
+- Implement refresh tokens as specified by [MSC2918](https://github.com/matrix-org/matrix-doc/pull/2918). ([\#9450](https://github.com/matrix-org/synapse/issues/9450))
+- Add support for evicting cache entries based on last access time. ([\#10205](https://github.com/matrix-org/synapse/issues/10205))
+- Omit empty fields from the `/sync` response. Contributed by @deepbluev7. ([\#10214](https://github.com/matrix-org/synapse/issues/10214))
+- Improve validation on federation `send_{join,leave,knock}` endpoints. ([\#10225](https://github.com/matrix-org/synapse/issues/10225), [\#10243](https://github.com/matrix-org/synapse/issues/10243))
+- Add SSO `external_ids` to the Query User Account admin API. ([\#10261](https://github.com/matrix-org/synapse/issues/10261))
+- Mark events received over federation which fail a spam check as "soft-failed". ([\#10263](https://github.com/matrix-org/synapse/issues/10263))
+- Add metrics for new inbound federation staging area. ([\#10284](https://github.com/matrix-org/synapse/issues/10284))
+- Add script to print information about recently registered users. ([\#10290](https://github.com/matrix-org/synapse/issues/10290))
+
+
+Bugfixes
+--------
+
+- Fix a long-standing bug which meant that invite rejections and knocks were not sent out over federation in a timely manner. ([\#10223](https://github.com/matrix-org/synapse/issues/10223))
+- Fix a bug introduced in v1.26.0 where only users who have set profile information could be deactivated with erasure enabled. ([\#10252](https://github.com/matrix-org/synapse/issues/10252))
+- Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server. ([\#10264](https://github.com/matrix-org/synapse/issues/10264), [\#10267](https://github.com/matrix-org/synapse/issues/10267), [\#10282](https://github.com/matrix-org/synapse/issues/10282), [\#10286](https://github.com/matrix-org/synapse/issues/10286), [\#10291](https://github.com/matrix-org/synapse/issues/10291), [\#10314](https://github.com/matrix-org/synapse/issues/10314))
+- Fix the prometheus `synapse_federation_server_pdu_process_time` metric. Broke in v1.37.1. ([\#10279](https://github.com/matrix-org/synapse/issues/10279))
+- Ensure that inbound events from federation that were being processed when Synapse was restarted get promptly processed on start up. ([\#10303](https://github.com/matrix-org/synapse/issues/10303))
+
+
+Improved Documentation
+----------------------
+
+- Move the upgrade notes to [docs/upgrade.md](https://github.com/matrix-org/synapse/blob/develop/docs/upgrade.md) and convert them to markdown. ([\#10166](https://github.com/matrix-org/synapse/issues/10166))
+- Choose Welcome & Overview as the default page for synapse documentation website. ([\#10242](https://github.com/matrix-org/synapse/issues/10242))
+- Adjust the URL in the README.rst file to point to irc.libera.chat. ([\#10258](https://github.com/matrix-org/synapse/issues/10258))
+- Fix homeserver config option name in presence router documentation. ([\#10288](https://github.com/matrix-org/synapse/issues/10288))
+- Fix link pointing at the wrong section in the modules documentation page. ([\#10302](https://github.com/matrix-org/synapse/issues/10302))
+
+
+Internal Changes
+----------------
+
+- Drop Origin and Accept from the value of the Access-Control-Allow-Headers response header. ([\#10114](https://github.com/matrix-org/synapse/issues/10114))
+- Add type hints to the federation servlets. ([\#10213](https://github.com/matrix-org/synapse/issues/10213))
+- Improve the reliability of auto-joining remote rooms. ([\#10237](https://github.com/matrix-org/synapse/issues/10237))
+- Update the release script to use the semver terminology and determine the release branch based on the next version. ([\#10239](https://github.com/matrix-org/synapse/issues/10239))
+- Fix type hints for computing auth events. ([\#10253](https://github.com/matrix-org/synapse/issues/10253))
+- Improve the performance of the spaces summary endpoint by only recursing into spaces (and not rooms in general). ([\#10256](https://github.com/matrix-org/synapse/issues/10256))
+- Move event authentication methods from `Auth` to `EventAuthHandler`. ([\#10268](https://github.com/matrix-org/synapse/issues/10268))
+- Reenable a SyTest after it has been fixed. ([\#10292](https://github.com/matrix-org/synapse/issues/10292))
+
+
 Synapse 1.38.0 (**UNRELEASED**)
 ===============================
 This release includes a database schema update which could result in elevated disk usage. See the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade.md#upgrading-to-v1380) for more information.
diff --git a/changelog.d/10114.misc b/changelog.d/10114.misc
deleted file mode 100644
index 808548f7c7..0000000000
--- a/changelog.d/10114.misc
+++ /dev/null
@@ -1 +0,0 @@
-Drop Origin and Accept from the value of the Access-Control-Allow-Headers response header.
diff --git a/changelog.d/10166.doc b/changelog.d/10166.doc
deleted file mode 100644
index 8d1710c132..0000000000
--- a/changelog.d/10166.doc
+++ /dev/null
@@ -1 +0,0 @@
-Move the upgrade notes to [docs/upgrade.md](https://github.com/matrix-org/synapse/blob/develop/docs/upgrade.md) and convert them to markdown.
diff --git a/changelog.d/10205.feature b/changelog.d/10205.feature
deleted file mode 100644
index db3fd22587..0000000000
--- a/changelog.d/10205.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add support for evicting cache entries based on last access time.
diff --git a/changelog.d/10213.misc b/changelog.d/10213.misc
deleted file mode 100644
index 9adb0fbd02..0000000000
--- a/changelog.d/10213.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type hints to the federation servlets.
diff --git a/changelog.d/10214.feature b/changelog.d/10214.feature
deleted file mode 100644
index a3818c9d25..0000000000
--- a/changelog.d/10214.feature
+++ /dev/null
@@ -1 +0,0 @@
-Omit empty fields from the `/sync` response. Contributed by @deepbluev7.
\ No newline at end of file
diff --git a/changelog.d/10223.bugfix b/changelog.d/10223.bugfix
deleted file mode 100644
index 4e42f6b608..0000000000
--- a/changelog.d/10223.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug which meant that invite rejections and knocks were not sent out over federation in a timely manner.
diff --git a/changelog.d/10225.feature b/changelog.d/10225.feature
deleted file mode 100644
index d16f66ffe9..0000000000
--- a/changelog.d/10225.feature
+++ /dev/null
@@ -1 +0,0 @@
-Improve validation on federation `send_{join,leave,knock}` endpoints.
diff --git a/changelog.d/10237.misc b/changelog.d/10237.misc
deleted file mode 100644
index d76c119a41..0000000000
--- a/changelog.d/10237.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve the reliability of auto-joining remote rooms.
diff --git a/changelog.d/10239.misc b/changelog.d/10239.misc
deleted file mode 100644
index d05f1c4411..0000000000
--- a/changelog.d/10239.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update the release script to use the semver terminology and determine the release branch based on the next version.
diff --git a/changelog.d/10242.doc b/changelog.d/10242.doc
deleted file mode 100644
index 2241b28547..0000000000
--- a/changelog.d/10242.doc
+++ /dev/null
@@ -1 +0,0 @@
-Choose Welcome & Overview as the default page for synapse documentation website.
diff --git a/changelog.d/10243.feature b/changelog.d/10243.feature
deleted file mode 100644
index d16f66ffe9..0000000000
--- a/changelog.d/10243.feature
+++ /dev/null
@@ -1 +0,0 @@
-Improve validation on federation `send_{join,leave,knock}` endpoints.
diff --git a/changelog.d/10252.bugfix b/changelog.d/10252.bugfix
deleted file mode 100644
index c8ddd14528..0000000000
--- a/changelog.d/10252.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug introduced in v1.26.0 where only users who have set profile information could be deactivated with erasure enabled.
diff --git a/changelog.d/10253.misc b/changelog.d/10253.misc
deleted file mode 100644
index 44d9217245..0000000000
--- a/changelog.d/10253.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix type hints for computing auth events.
diff --git a/changelog.d/10256.misc b/changelog.d/10256.misc
deleted file mode 100644
index adef12fcb9..0000000000
--- a/changelog.d/10256.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve the performance of the spaces summary endpoint by only recursing into spaces (and not rooms in general).
diff --git a/changelog.d/10258.doc b/changelog.d/10258.doc
deleted file mode 100644
index 1549786c0c..0000000000
--- a/changelog.d/10258.doc
+++ /dev/null
@@ -1 +0,0 @@
-Adjust the URL in the README.rst file to point to irc.libera.chat.
diff --git a/changelog.d/10261.feature b/changelog.d/10261.feature
deleted file mode 100644
index cd55cecbd5..0000000000
--- a/changelog.d/10261.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add SSO `external_ids` to the Query User Account admin API.
diff --git a/changelog.d/10263.feature b/changelog.d/10263.feature
deleted file mode 100644
index 7b1d2fe60f..0000000000
--- a/changelog.d/10263.feature
+++ /dev/null
@@ -1 +0,0 @@
-Mark events received over federation which fail a spam check as "soft-failed".
diff --git a/changelog.d/10264.bugfix b/changelog.d/10264.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10264.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/10267.bugfix b/changelog.d/10267.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10267.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/10268.misc b/changelog.d/10268.misc
deleted file mode 100644
index 9e3f60c72f..0000000000
--- a/changelog.d/10268.misc
+++ /dev/null
@@ -1 +0,0 @@
-Move event authentication methods from `Auth` to `EventAuthHandler`.
diff --git a/changelog.d/10279.bugfix b/changelog.d/10279.bugfix
deleted file mode 100644
index ac8b64ead9..0000000000
--- a/changelog.d/10279.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix the prometheus `synapse_federation_server_pdu_process_time` metric. Broke in v1.37.1.
diff --git a/changelog.d/10282.bugfix b/changelog.d/10282.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10282.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/10284.feature b/changelog.d/10284.feature
deleted file mode 100644
index 379155e8cf..0000000000
--- a/changelog.d/10284.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add metrics for new inbound federation staging area.
diff --git a/changelog.d/10286.bugfix b/changelog.d/10286.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10286.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/10288.doc b/changelog.d/10288.doc
deleted file mode 100644
index 0739687b92..0000000000
--- a/changelog.d/10288.doc
+++ /dev/null
@@ -1 +0,0 @@
-Fix homeserver config option name in presence router documentation.
diff --git a/changelog.d/10290.feature b/changelog.d/10290.feature
deleted file mode 100644
index 4e4c2e24ef..0000000000
--- a/changelog.d/10290.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add script to print information about recently registered users.
diff --git a/changelog.d/10291.bugfix b/changelog.d/10291.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10291.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/10292.misc b/changelog.d/10292.misc
deleted file mode 100644
index 9e87d8682c..0000000000
--- a/changelog.d/10292.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reenable a SyTest after it has been fixed.
diff --git a/changelog.d/10302.doc b/changelog.d/10302.doc
deleted file mode 100644
index 7386817de7..0000000000
--- a/changelog.d/10302.doc
+++ /dev/null
@@ -1 +0,0 @@
-Fix link pointing at the wrong section in the modules documentation page.
diff --git a/changelog.d/10303.bugfix b/changelog.d/10303.bugfix
deleted file mode 100644
index c0577c9f73..0000000000
--- a/changelog.d/10303.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Ensure that inbound events from federation that were being processed when Synapse was restarted get promptly processed on start up.
diff --git a/changelog.d/10314.bugfix b/changelog.d/10314.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10314.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/9450.feature b/changelog.d/9450.feature
deleted file mode 100644
index 455936a41d..0000000000
--- a/changelog.d/9450.feature
+++ /dev/null
@@ -1 +0,0 @@
-Implement refresh tokens as specified by [MSC2918](https://github.com/matrix-org/matrix-doc/pull/2918).
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 1bd03462ac..aa9a3269c0 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -47,7 +47,7 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.37.1"
+__version__ = "1.38.0rc1"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when
-- 
cgit 1.4.1


From 994722410a9810cd2e736bb96ae3d04c708c46e7 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 6 Jul 2021 14:08:12 +0100
Subject: Small changelog tweaks

---
 CHANGES.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 273d53690a..c4551fdd69 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,6 +1,8 @@
 Synapse 1.38.0rc1 (2021-07-06)
 ==============================
 
+This release includes a database schema update which could result in elevated disk usage. See the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade.md#upgrading-to-v1380) for more information.
+
 Features
 --------
 
@@ -37,20 +39,16 @@ Improved Documentation
 Internal Changes
 ----------------
 
-- Drop Origin and Accept from the value of the Access-Control-Allow-Headers response header. ([\#10114](https://github.com/matrix-org/synapse/issues/10114))
+- Drop `Origin` and `Accept` from the value of the `Access-Control-Allow-Headers` response header. ([\#10114](https://github.com/matrix-org/synapse/issues/10114))
 - Add type hints to the federation servlets. ([\#10213](https://github.com/matrix-org/synapse/issues/10213))
 - Improve the reliability of auto-joining remote rooms. ([\#10237](https://github.com/matrix-org/synapse/issues/10237))
 - Update the release script to use the semver terminology and determine the release branch based on the next version. ([\#10239](https://github.com/matrix-org/synapse/issues/10239))
 - Fix type hints for computing auth events. ([\#10253](https://github.com/matrix-org/synapse/issues/10253))
 - Improve the performance of the spaces summary endpoint by only recursing into spaces (and not rooms in general). ([\#10256](https://github.com/matrix-org/synapse/issues/10256))
 - Move event authentication methods from `Auth` to `EventAuthHandler`. ([\#10268](https://github.com/matrix-org/synapse/issues/10268))
-- Reenable a SyTest after it has been fixed. ([\#10292](https://github.com/matrix-org/synapse/issues/10292))
+- Re-enable a SyTest after it has been fixed. ([\#10292](https://github.com/matrix-org/synapse/issues/10292))
 
 
-Synapse 1.38.0 (**UNRELEASED**)
-===============================
-This release includes a database schema update which could result in elevated disk usage. See the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade.md#upgrading-to-v1380) for more information.
-
 Synapse 1.37.1 (2021-06-30)
 ===========================
 
-- 
cgit 1.4.1


From 47e28b4031c7c5e2c87824c2b4873492b996d02e Mon Sep 17 00:00:00 2001
From: Dagfinn Ilmari Mannsåker <ilmari@ilmari.org>
Date: Tue, 6 Jul 2021 14:31:13 +0100
Subject: Ignore EDUs for rooms we're not in (#10317)

---
 changelog.d/10317.bugfix      |  1 +
 synapse/handlers/receipts.py  | 15 +++++++++++++++
 synapse/handlers/typing.py    | 14 ++++++++++++++
 tests/handlers/test_typing.py | 37 +++++++++++++++++++++++++++++++++++++
 4 files changed, 67 insertions(+)
 create mode 100644 changelog.d/10317.bugfix

diff --git a/changelog.d/10317.bugfix b/changelog.d/10317.bugfix
new file mode 100644
index 0000000000..826c269eff
--- /dev/null
+++ b/changelog.d/10317.bugfix
@@ -0,0 +1 @@
+Fix purging rooms that other homeservers are still sending events for. Contributed by @ilmari.
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index f782d9db32..0059ad0f56 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -30,6 +30,8 @@ class ReceiptsHandler(BaseHandler):
 
         self.server_name = hs.config.server_name
         self.store = hs.get_datastore()
+        self.event_auth_handler = hs.get_event_auth_handler()
+
         self.hs = hs
 
         # We only need to poke the federation sender explicitly if its on the
@@ -59,6 +61,19 @@ class ReceiptsHandler(BaseHandler):
         """Called when we receive an EDU of type m.receipt from a remote HS."""
         receipts = []
         for room_id, room_values in content.items():
+            # If we're not in the room just ditch the event entirely. This is
+            # probably an old server that has come back and thinks we're still in
+            # the room (or we've been rejoined to the room by a state reset).
+            is_in_room = await self.event_auth_handler.check_host_in_room(
+                room_id, self.server_name
+            )
+            if not is_in_room:
+                logger.info(
+                    "Ignoring receipt from %s as we're not in the room",
+                    origin,
+                )
+                continue
+
             for receipt_type, users in room_values.items():
                 for user_id, user_values in users.items():
                     if get_domain_from_id(user_id) != origin:
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index e22393adc4..c0a8364755 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -208,6 +208,7 @@ class TypingWriterHandler(FollowerTypingHandler):
 
         self.auth = hs.get_auth()
         self.notifier = hs.get_notifier()
+        self.event_auth_handler = hs.get_event_auth_handler()
 
         self.hs = hs
 
@@ -326,6 +327,19 @@ class TypingWriterHandler(FollowerTypingHandler):
         room_id = content["room_id"]
         user_id = content["user_id"]
 
+        # If we're not in the room just ditch the event entirely. This is
+        # probably an old server that has come back and thinks we're still in
+        # the room (or we've been rejoined to the room by a state reset).
+        is_in_room = await self.event_auth_handler.check_host_in_room(
+            room_id, self.server_name
+        )
+        if not is_in_room:
+            logger.info(
+                "Ignoring typing update from %s as we're not in the room",
+                origin,
+            )
+            return
+
         member = RoomMember(user_id=user_id, room_id=room_id)
 
         # Check that the string is a valid user id
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index f58afbc244..fa3cff598e 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -38,6 +38,9 @@ U_ONION = UserID.from_string("@onion:farm")
 # Test room id
 ROOM_ID = "a-room"
 
+# Room we're not in
+OTHER_ROOM_ID = "another-room"
+
 
 def _expect_edu_transaction(edu_type, content, origin="test"):
     return {
@@ -115,6 +118,11 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
 
         hs.get_auth().check_user_in_room = check_user_in_room
 
+        async def check_host_in_room(room_id, server_name):
+            return room_id == ROOM_ID
+
+        hs.get_event_auth_handler().check_host_in_room = check_host_in_room
+
         def get_joined_hosts_for_room(room_id):
             return {member.domain for member in self.room_members}
 
@@ -244,6 +252,35 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
             ],
         )
 
+    def test_started_typing_remote_recv_not_in_room(self):
+        self.room_members = [U_APPLE, U_ONION]
+
+        self.assertEquals(self.event_source.get_current_key(), 0)
+
+        channel = self.make_request(
+            "PUT",
+            "/_matrix/federation/v1/send/1000000",
+            _make_edu_transaction_json(
+                "m.typing",
+                content={
+                    "room_id": OTHER_ROOM_ID,
+                    "user_id": U_ONION.to_string(),
+                    "typing": True,
+                },
+            ),
+            federation_auth_origin=b"farm",
+        )
+        self.assertEqual(channel.code, 200)
+
+        self.on_new_event.assert_not_called()
+
+        self.assertEquals(self.event_source.get_current_key(), 0)
+        events = self.get_success(
+            self.event_source.get_new_events(room_ids=[OTHER_ROOM_ID], from_key=0)
+        )
+        self.assertEquals(events[0], [])
+        self.assertEquals(events[1], 0)
+
     @override_config({"send_federation": True})
     def test_stopped_typing(self):
         self.room_members = [U_APPLE, U_BANANA, U_ONION]
-- 
cgit 1.4.1


From 7c823789921ac34f1fee670be7ef7f6c8266832b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 7 Jul 2021 10:43:54 +0100
Subject: build the docs for master (#10323)

---
 .github/workflows/docs.yaml | 59 +++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 23b8d7f909..22a2d4f6bf 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -7,6 +7,8 @@ on:
       - develop
       # For documentation specific to a release
       - 'release-v*'
+      # stable docs
+      - master
 
   workflow_dispatch:
 
@@ -25,40 +27,35 @@ jobs:
       - name: Build the documentation
         run: mdbook build
 
-      # Deploy to the latest documentation directories
-      - name: Deploy latest documentation
-        uses: peaceiris/actions-gh-pages@068dc23d9710f1ba62e86896f84735d869951305 # v3.8.0
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          keep_files: true
-          publish_dir: ./book
-          destination_dir: ./develop
-
-      - name: Get the current Synapse version
+      # Figure out the target directory.
+      #
+      # The target directory depends on the name of the branch
+      #
+      - name: Get the target directory name
         id: vars
-        # The $GITHUB_REF value for a branch looks like `refs/heads/release-v1.2`. We do some
-        # shell magic to remove the "refs/heads/release-v" bit from this, to end up with "1.2",
-        # our major/minor version number, and set this to a var called `branch-version`.
-        #
-        # We then use some python to get Synapse's full version string, which may look
-        # like "1.2.3rc4". We set this to a var called `synapse-version`. We use this
-        # to determine if this release is still an RC, and if so block deployment.
         run: |
-          echo ::set-output name=branch-version::${GITHUB_REF#refs/heads/release-v}
-          echo ::set-output name=synapse-version::`python3 -c 'import synapse; print(synapse.__version__)'`
-
-      # Deploy to the version-specific directory
-      - name: Deploy release-specific documentation
-        # We only carry out this step if we're running on a release branch,
-        # and the current Synapse version does not have "rc" in the name.
-        #
-        # The result is that only full releases are deployed, but can be
-        # updated if the release branch gets retroactive fixes.
-        if: ${{ startsWith( github.ref, 'refs/heads/release-v' ) && !contains( steps.vars.outputs.synapse-version, 'rc') }}
-        uses: peaceiris/actions-gh-pages@v3
+          # first strip the 'refs/heads/' prefix with some shell foo
+          branch="${GITHUB_REF#refs/heads/}"
+
+          case $branch in
+              release-*)
+                  # strip 'release-' from the name for release branches.
+                  branch="${branch#release-}"
+                  ;;
+              master)
+                  # deploy to "latest" for the master branch.
+                  branch="latest"
+                  ;;
+          esac
+
+          # finally, set the 'branch-version' var.
+          echo "::set-output name=branch-version::$branch"
+          
+      # Deploy to the target directory.
+      - name: Deploy to gh pages
+        uses: peaceiris/actions-gh-pages@068dc23d9710f1ba62e86896f84735d869951305 # v3.8.0
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           keep_files: true
           publish_dir: ./book
-          # The resulting documentation will end up in a directory named `vX.Y`.
-          destination_dir: ./v${{ steps.vars.outputs.branch-version }}
+          destination_dir: ./${{ steps.vars.outputs.branch-version }}
-- 
cgit 1.4.1


From 9ad84558951dd970dc2a362c923552141a42a5f3 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 7 Jul 2021 11:56:17 +0200
Subject: ANALYZE new stream ordering column (#10326)

Fixes #10325
---
 changelog.d/10326.bugfix                            |  1 +
 synapse/storage/databases/main/events_bg_updates.py | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 changelog.d/10326.bugfix

diff --git a/changelog.d/10326.bugfix b/changelog.d/10326.bugfix
new file mode 100644
index 0000000000..7ebda7cdc2
--- /dev/null
+++ b/changelog.d/10326.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 1c95c66648..29f33bac55 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -1146,6 +1146,16 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
                 logger.info("completing stream_ordering migration: %s", sql)
                 txn.execute(sql)
 
+        # ANALYZE the new column to build stats on it, to encourage PostgreSQL to use the
+        # indexes on it.
+        # We need to pass execute a dummy function to handle the txn's result otherwise
+        # it tries to call fetchall() on it and fails because there's no result to fetch.
+        await self.db_pool.execute(
+            "background_analyze_new_stream_ordering_column",
+            lambda txn: None,
+            "ANALYZE events(stream_ordering2)",
+        )
+
         await self.db_pool.runInteraction(
             "_background_replace_stream_ordering_column", process
         )
-- 
cgit 1.4.1


From 24796f80ba3aecf449bc9921b259e3d98a049920 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 7 Jul 2021 11:21:58 +0100
Subject: Merge latest fix into the changelog

---
 CHANGES.md               | 2 +-
 changelog.d/10326.bugfix | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)
 delete mode 100644 changelog.d/10326.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index c4551fdd69..1d6dffec6e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -21,7 +21,7 @@ Bugfixes
 
 - Fix a long-standing bug which meant that invite rejections and knocks were not sent out over federation in a timely manner. ([\#10223](https://github.com/matrix-org/synapse/issues/10223))
 - Fix a bug introduced in v1.26.0 where only users who have set profile information could be deactivated with erasure enabled. ([\#10252](https://github.com/matrix-org/synapse/issues/10252))
-- Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server. ([\#10264](https://github.com/matrix-org/synapse/issues/10264), [\#10267](https://github.com/matrix-org/synapse/issues/10267), [\#10282](https://github.com/matrix-org/synapse/issues/10282), [\#10286](https://github.com/matrix-org/synapse/issues/10286), [\#10291](https://github.com/matrix-org/synapse/issues/10291), [\#10314](https://github.com/matrix-org/synapse/issues/10314))
+- Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server. ([\#10264](https://github.com/matrix-org/synapse/issues/10264), [\#10267](https://github.com/matrix-org/synapse/issues/10267), [\#10282](https://github.com/matrix-org/synapse/issues/10282), [\#10286](https://github.com/matrix-org/synapse/issues/10286), [\#10291](https://github.com/matrix-org/synapse/issues/10291), [\#10314](https://github.com/matrix-org/synapse/issues/10314), [\#10326](https://github.com/matrix-org/synapse/issues/10326))
 - Fix the prometheus `synapse_federation_server_pdu_process_time` metric. Broke in v1.37.1. ([\#10279](https://github.com/matrix-org/synapse/issues/10279))
 - Ensure that inbound events from federation that were being processed when Synapse was restarted get promptly processed on start up. ([\#10303](https://github.com/matrix-org/synapse/issues/10303))
 
diff --git a/changelog.d/10326.bugfix b/changelog.d/10326.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10326.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
-- 
cgit 1.4.1


From 7cb51680875170e22c72fc8b0d2fb3e3e09f4c67 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 7 Jul 2021 11:32:20 +0100
Subject: Fix broken link

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index 1d6dffec6e..2b0179edc3 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,7 +1,7 @@
 Synapse 1.38.0rc1 (2021-07-06)
 ==============================
 
-This release includes a database schema update which could result in elevated disk usage. See the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade.md#upgrading-to-v1380) for more information.
+This release includes a database schema update which could result in elevated disk usage. See the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade#upgrading-to-v1380) for more information.
 
 Features
 --------
-- 
cgit 1.4.1


From 2d044667cff1b6aeb1d791c6dede95cf7f5a8f2b Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Wed, 7 Jul 2021 13:18:36 +0200
Subject: Simplify structure of room admin API docs (#10313)

---
 changelog.d/10313.doc   |  1 +
 docs/admin_api/rooms.md | 69 +++++++++++++++++--------------------------------
 2 files changed, 25 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/10313.doc

diff --git a/changelog.d/10313.doc b/changelog.d/10313.doc
new file mode 100644
index 0000000000..44086e3d9d
--- /dev/null
+++ b/changelog.d/10313.doc
@@ -0,0 +1 @@
+Simplify structure of room admin API.
\ No newline at end of file
diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md
index bb7828a525..48777dd231 100644
--- a/docs/admin_api/rooms.md
+++ b/docs/admin_api/rooms.md
@@ -1,13 +1,9 @@
 # Contents
 - [List Room API](#list-room-api)
-  * [Parameters](#parameters)
-  * [Usage](#usage)
 - [Room Details API](#room-details-api)
 - [Room Members API](#room-members-api)
 - [Room State API](#room-state-api)
 - [Delete Room API](#delete-room-api)
-  * [Parameters](#parameters-1)
-  * [Response](#response)
   * [Undoing room shutdowns](#undoing-room-shutdowns)
 - [Make Room Admin API](#make-room-admin-api)
 - [Forward Extremities Admin API](#forward-extremities-admin-api)
@@ -19,7 +15,7 @@ The List Room admin API allows server admins to get a list of rooms on their
 server. There are various parameters available that allow for filtering and
 sorting the returned list. This API supports pagination.
 
-## Parameters
+**Parameters**
 
 The following query parameters are available:
 
@@ -46,6 +42,8 @@ The following query parameters are available:
 * `search_term` - Filter rooms by their room name. Search term can be contained in any
                   part of the room name. Defaults to no filtering.
 
+**Response**
+
 The following fields are possible in the JSON response body:
 
 * `rooms` - An array of objects, each containing information about a room.
@@ -79,17 +77,15 @@ The following fields are possible in the JSON response body:
                  Use `prev_batch` for the `from` value in the next request to
                  get the "previous page" of results.
 
-## Usage
+The API is:
 
 A standard request with no filtering:
 
 ```
 GET /_synapse/admin/v1/rooms
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```jsonc
 {
@@ -137,11 +133,9 @@ Filtering by room name:
 
 ```
 GET /_synapse/admin/v1/rooms?search_term=TWIM
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -172,11 +166,9 @@ Paginating through a list of rooms:
 
 ```
 GET /_synapse/admin/v1/rooms?order_by=size
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```jsonc
 {
@@ -228,11 +220,9 @@ parameter to the value of `next_token`.
 
 ```
 GET /_synapse/admin/v1/rooms?order_by=size&from=100
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```jsonc
 {
@@ -304,17 +294,13 @@ The following fields are possible in the JSON response body:
 * `history_visibility` - Who can see the room history. One of: ["invited", "joined", "shared", "world_readable"].
 * `state_events` - Total number of state_events of a room. Complexity of the room.
 
-## Usage
-
-A standard request:
+The API is:
 
 ```
 GET /_synapse/admin/v1/rooms/<room_id>
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -347,17 +333,13 @@ The response includes the following fields:
 * `members` - A list of all the members that are present in the room, represented by their ids.
 * `total` - Total number of members in the room.
 
-## Usage
-
-A standard request:
+The API is:
 
 ```
 GET /_synapse/admin/v1/rooms/<room_id>/members
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -378,17 +360,13 @@ The response includes the following fields:
 
 * `state` - The current state of the room at the time of request.
 
-## Usage
-
-A standard request:
+The API is:
 
 ```
 GET /_synapse/admin/v1/rooms/<room_id>/state
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -432,6 +410,7 @@ DELETE /_synapse/admin/v1/rooms/<room_id>
 ```
 
 with a body of:
+
 ```json
 {
     "new_room_user_id": "@someuser:example.com",
@@ -461,7 +440,7 @@ A response body like the following is returned:
 }
 ```
 
-## Parameters
+**Parameters**
 
 The following parameters should be set in the URL:
 
@@ -491,7 +470,7 @@ The following JSON body parameters are available:
 
 The JSON body must not be empty. The body must be at least `{}`.
 
-## Response
+**Response**
 
 The following fields are returned in the JSON response body:
 
@@ -548,10 +527,10 @@ By default the server admin (the caller) is granted power, but another user can
 optionally be specified, e.g.:
 
 ```
-    POST /_synapse/admin/v1/rooms/<room_id_or_alias>/make_room_admin
-    {
-        "user_id": "@foo:example.com"
-    }
+POST /_synapse/admin/v1/rooms/<room_id_or_alias>/make_room_admin
+{
+    "user_id": "@foo:example.com"
+}
 ```
 
 # Forward Extremities Admin API
@@ -565,7 +544,7 @@ extremities accumulate in a room, performance can become degraded. For details,
 To check the status of forward extremities for a room:
 
 ```
-    GET /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
+GET /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
 ```
 
 A response as follows will be returned:
@@ -581,7 +560,7 @@ A response as follows will be returned:
       "received_ts": 1611263016761
     }
   ]
-}    
+}
 ```
 
 ## Deleting forward extremities
@@ -594,7 +573,7 @@ If a room has lots of forward extremities, the extra can be
 deleted as follows:
 
 ```
-    DELETE /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
+DELETE /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
 ```
 
 A response as follows will be returned, indicating the amount of forward extremities
-- 
cgit 1.4.1


From 56fd5fa8e1cacdba89ff1c9a9c18d0d6f0cb0f74 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Wed, 7 Jul 2021 13:35:45 +0200
Subject: Update links to documentation in sample config (#10287)

Signed-off-by: Dirk Klimpel dirk@klimpel.org
---
 changelog.d/10287.doc                     |  1 +
 docs/sample_config.yaml                   | 44 ++++++++++++++++---------------
 docs/sample_log_config.yaml               |  2 +-
 synapse/config/consent.py                 |  2 +-
 synapse/config/database.py                |  3 ++-
 synapse/config/jwt.py                     |  2 +-
 synapse/config/logger.py                  |  2 +-
 synapse/config/modules.py                 |  2 +-
 synapse/config/oidc.py                    |  4 +--
 synapse/config/password_auth_providers.py |  2 +-
 synapse/config/repository.py              |  2 +-
 synapse/config/server.py                  | 23 ++++++++--------
 synapse/config/spam_checker.py            |  2 +-
 synapse/config/stats.py                   |  2 +-
 synapse/config/tracer.py                  |  2 +-
 synapse/config/user_directory.py          |  2 +-
 16 files changed, 51 insertions(+), 46 deletions(-)
 create mode 100644 changelog.d/10287.doc

diff --git a/changelog.d/10287.doc b/changelog.d/10287.doc
new file mode 100644
index 0000000000..d62afc1e15
--- /dev/null
+++ b/changelog.d/10287.doc
@@ -0,0 +1 @@
+Update links to documentation in sample config. Contributed by @dklimpel.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index c04aca1f42..71463168e3 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -36,7 +36,7 @@
 
 # Server admins can expand Synapse's functionality with external modules.
 #
-# See https://matrix-org.github.io/synapse/develop/modules.html for more
+# See https://matrix-org.github.io/synapse/latest/modules.html for more
 # documentation on how to configure or create custom modules for Synapse.
 #
 modules:
@@ -58,7 +58,7 @@ modules:
 # In most cases you should avoid using a matrix specific subdomain such as
 # matrix.example.com or synapse.example.com as the server_name for the same
 # reasons you wouldn't use user@email.example.com as your email address.
-# See https://github.com/matrix-org/synapse/blob/master/docs/delegate.md
+# See https://matrix-org.github.io/synapse/latest/delegate.html
 # for information on how to host Synapse on a subdomain while preserving
 # a clean server_name.
 #
@@ -253,9 +253,9 @@ presence:
 #       'all local interfaces'.
 #
 #   type: the type of listener. Normally 'http', but other valid options are:
-#       'manhole' (see docs/manhole.md),
-#       'metrics' (see docs/metrics-howto.md),
-#       'replication' (see docs/workers.md).
+#       'manhole' (see https://matrix-org.github.io/synapse/latest/manhole.html),
+#       'metrics' (see https://matrix-org.github.io/synapse/latest/metrics-howto.html),
+#       'replication' (see https://matrix-org.github.io/synapse/latest/workers.html).
 #
 #   tls: set to true to enable TLS for this listener. Will use the TLS
 #       key/cert specified in tls_private_key_path / tls_certificate_path.
@@ -280,8 +280,8 @@ presence:
 #   client: the client-server API (/_matrix/client), and the synapse admin
 #       API (/_synapse/admin). Also implies 'media' and 'static'.
 #
-#   consent: user consent forms (/_matrix/consent). See
-#       docs/consent_tracking.md.
+#   consent: user consent forms (/_matrix/consent).
+#       See https://matrix-org.github.io/synapse/latest/consent_tracking.html.
 #
 #   federation: the server-server API (/_matrix/federation). Also implies
 #       'media', 'keys', 'openid'
@@ -290,12 +290,13 @@ presence:
 #
 #   media: the media API (/_matrix/media).
 #
-#   metrics: the metrics interface. See docs/metrics-howto.md.
+#   metrics: the metrics interface.
+#       See https://matrix-org.github.io/synapse/latest/metrics-howto.html.
 #
 #   openid: OpenID authentication.
 #
-#   replication: the HTTP replication API (/_synapse/replication). See
-#       docs/workers.md.
+#   replication: the HTTP replication API (/_synapse/replication).
+#       See https://matrix-org.github.io/synapse/latest/workers.html.
 #
 #   static: static resources under synapse/static (/_matrix/static). (Mostly
 #       useful for 'fallback authentication'.)
@@ -319,7 +320,7 @@ listeners:
   # that unwraps TLS.
   #
   # If you plan to use a reverse proxy, please see
-  # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md.
+  # https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
   #
   - port: 8008
     tls: false
@@ -747,7 +748,8 @@ caches:
 #    cp_min: 5
 #    cp_max: 10
 #
-# For more information on using Synapse with Postgres, see `docs/postgres.md`.
+# For more information on using Synapse with Postgres,
+# see https://matrix-org.github.io/synapse/latest/postgres.html.
 #
 database:
   name: sqlite3
@@ -900,7 +902,7 @@ media_store_path: "DATADIR/media_store"
 #
 # If you are using a reverse proxy you may also need to set this value in
 # your reverse proxy's config. Notably Nginx has a small max body size by default.
-# See https://matrix-org.github.io/synapse/develop/reverse_proxy.html.
+# See https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
 #
 #max_upload_size: 50M
 
@@ -1840,7 +1842,7 @@ saml2_config:
 #
 #       module: The class name of a custom mapping module. Default is
 #           'synapse.handlers.oidc.JinjaOidcMappingProvider'.
-#           See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
+#           See https://matrix-org.github.io/synapse/latest/sso_mapping_providers.html#openid-mapping-providers
 #           for information on implementing a custom mapping provider.
 #
 #       config: Configuration for the mapping provider module. This section will
@@ -1891,7 +1893,7 @@ saml2_config:
 #     - attribute: groups
 #       value: "admin"
 #
-# See https://github.com/matrix-org/synapse/blob/master/docs/openid.md
+# See https://matrix-org.github.io/synapse/latest/openid.html
 # for information on how to configure these options.
 #
 # For backwards compatibility, it is also possible to configure a single OIDC
@@ -2169,7 +2171,7 @@ sso:
 # Note that this is a non-standard login type and client support is
 # expected to be non-existent.
 #
-# See https://github.com/matrix-org/synapse/blob/master/docs/jwt.md.
+# See https://matrix-org.github.io/synapse/latest/jwt.html.
 #
 #jwt_config:
     # Uncomment the following to enable authorization using JSON web
@@ -2469,7 +2471,7 @@ email:
 # ex. LDAP, external tokens, etc.
 #
 # For more information and known implementations, please see
-# https://github.com/matrix-org/synapse/blob/master/docs/password_auth_providers.md
+# https://matrix-org.github.io/synapse/latest/password_auth_providers.html
 #
 # Note: instances wishing to use SAML or CAS authentication should
 # instead use the `saml2_config` or `cas_config` options,
@@ -2571,7 +2573,7 @@ user_directory:
     #
     # If you set it true, you'll have to rebuild the user_directory search
     # indexes, see:
-    # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
+    # https://matrix-org.github.io/synapse/latest/user_directory.html
     #
     # Uncomment to return search results containing all known users, even if that
     # user does not share a room with the requester.
@@ -2591,7 +2593,7 @@ user_directory:
 # User Consent configuration
 #
 # for detailed instructions, see
-# https://github.com/matrix-org/synapse/blob/master/docs/consent_tracking.md
+# https://matrix-org.github.io/synapse/latest/consent_tracking.html
 #
 # Parts of this section are required if enabling the 'consent' resource under
 # 'listeners', in particular 'template_dir' and 'version'.
@@ -2641,7 +2643,7 @@ user_directory:
 
 
 # Settings for local room and user statistics collection. See
-# docs/room_and_user_statistics.md.
+# https://matrix-org.github.io/synapse/latest/room_and_user_statistics.html.
 #
 stats:
   # Uncomment the following to disable room and user statistics. Note that doing
@@ -2768,7 +2770,7 @@ opentracing:
     #enabled: true
 
     # The list of homeservers we wish to send and receive span contexts and span baggage.
-    # See docs/opentracing.rst.
+    # See https://matrix-org.github.io/synapse/latest/opentracing.html.
     #
     # This is a list of regexes which are matched against the server_name of the
     # homeserver.
diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml
index ff3c747180..669e600081 100644
--- a/docs/sample_log_config.yaml
+++ b/docs/sample_log_config.yaml
@@ -7,7 +7,7 @@
 # be ingested by ELK stacks. See [2] for details.
 #
 # [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema
-# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md
+# [2]: https://matrix-org.github.io/synapse/latest/structured_logging.html
 
 version: 1
 
diff --git a/synapse/config/consent.py b/synapse/config/consent.py
index 30d07cc219..b05a9bd97f 100644
--- a/synapse/config/consent.py
+++ b/synapse/config/consent.py
@@ -22,7 +22,7 @@ DEFAULT_CONFIG = """\
 # User Consent configuration
 #
 # for detailed instructions, see
-# https://github.com/matrix-org/synapse/blob/master/docs/consent_tracking.md
+# https://matrix-org.github.io/synapse/latest/consent_tracking.html
 #
 # Parts of this section are required if enabling the 'consent' resource under
 # 'listeners', in particular 'template_dir' and 'version'.
diff --git a/synapse/config/database.py b/synapse/config/database.py
index c76ef1e1de..3d7d92f615 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -62,7 +62,8 @@ DEFAULT_CONFIG = """\
 #    cp_min: 5
 #    cp_max: 10
 #
-# For more information on using Synapse with Postgres, see `docs/postgres.md`.
+# For more information on using Synapse with Postgres,
+# see https://matrix-org.github.io/synapse/latest/postgres.html.
 #
 database:
   name: sqlite3
diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py
index 9e07e73008..9d295f5856 100644
--- a/synapse/config/jwt.py
+++ b/synapse/config/jwt.py
@@ -64,7 +64,7 @@ class JWTConfig(Config):
         # Note that this is a non-standard login type and client support is
         # expected to be non-existent.
         #
-        # See https://github.com/matrix-org/synapse/blob/master/docs/jwt.md.
+        # See https://matrix-org.github.io/synapse/latest/jwt.html.
         #
         #jwt_config:
             # Uncomment the following to enable authorization using JSON web
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 91d9bcf32e..ad4e6e61c3 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -49,7 +49,7 @@ DEFAULT_LOG_CONFIG = Template(
 # be ingested by ELK stacks. See [2] for details.
 #
 # [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema
-# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md
+# [2]: https://matrix-org.github.io/synapse/latest/structured_logging.html
 
 version: 1
 
diff --git a/synapse/config/modules.py b/synapse/config/modules.py
index 3209e1c492..ae0821e5a5 100644
--- a/synapse/config/modules.py
+++ b/synapse/config/modules.py
@@ -37,7 +37,7 @@ class ModulesConfig(Config):
 
             # Server admins can expand Synapse's functionality with external modules.
             #
-            # See https://matrix-org.github.io/synapse/develop/modules.html for more
+            # See https://matrix-org.github.io/synapse/latest/modules.html for more
             # documentation on how to configure or create custom modules for Synapse.
             #
             modules:
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index ea0abf5aa2..942e2672a9 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -166,7 +166,7 @@ class OIDCConfig(Config):
         #
         #       module: The class name of a custom mapping module. Default is
         #           {mapping_provider!r}.
-        #           See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
+        #           See https://matrix-org.github.io/synapse/latest/sso_mapping_providers.html#openid-mapping-providers
         #           for information on implementing a custom mapping provider.
         #
         #       config: Configuration for the mapping provider module. This section will
@@ -217,7 +217,7 @@ class OIDCConfig(Config):
         #     - attribute: groups
         #       value: "admin"
         #
-        # See https://github.com/matrix-org/synapse/blob/master/docs/openid.md
+        # See https://matrix-org.github.io/synapse/latest/openid.html
         # for information on how to configure these options.
         #
         # For backwards compatibility, it is also possible to configure a single OIDC
diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py
index 1cf69734bb..fd90b79772 100644
--- a/synapse/config/password_auth_providers.py
+++ b/synapse/config/password_auth_providers.py
@@ -57,7 +57,7 @@ class PasswordAuthProviderConfig(Config):
         # ex. LDAP, external tokens, etc.
         #
         # For more information and known implementations, please see
-        # https://github.com/matrix-org/synapse/blob/master/docs/password_auth_providers.md
+        # https://matrix-org.github.io/synapse/latest/password_auth_providers.html
         #
         # Note: instances wishing to use SAML or CAS authentication should
         # instead use the `saml2_config` or `cas_config` options,
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 2f77d6703d..a7a82742ac 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -250,7 +250,7 @@ class ContentRepositoryConfig(Config):
         #
         # If you are using a reverse proxy you may also need to set this value in
         # your reverse proxy's config. Notably Nginx has a small max body size by default.
-        # See https://matrix-org.github.io/synapse/develop/reverse_proxy.html.
+        # See https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
         #
         #max_upload_size: 50M
 
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 0833a5f7bc..6bff715230 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -153,7 +153,7 @@ ROOM_COMPLEXITY_TOO_GREAT = (
 METRICS_PORT_WARNING = """\
 The metrics_port configuration option is deprecated in Synapse 0.31 in favour of
 a listener. Please see
-https://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md
+https://matrix-org.github.io/synapse/latest/metrics-howto.html
 on how to configure the new listener.
 --------------------------------------------------------------------------------"""
 
@@ -811,7 +811,7 @@ class ServerConfig(Config):
         # In most cases you should avoid using a matrix specific subdomain such as
         # matrix.example.com or synapse.example.com as the server_name for the same
         # reasons you wouldn't use user@email.example.com as your email address.
-        # See https://github.com/matrix-org/synapse/blob/master/docs/delegate.md
+        # See https://matrix-org.github.io/synapse/latest/delegate.html
         # for information on how to host Synapse on a subdomain while preserving
         # a clean server_name.
         #
@@ -988,9 +988,9 @@ class ServerConfig(Config):
         #       'all local interfaces'.
         #
         #   type: the type of listener. Normally 'http', but other valid options are:
-        #       'manhole' (see docs/manhole.md),
-        #       'metrics' (see docs/metrics-howto.md),
-        #       'replication' (see docs/workers.md).
+        #       'manhole' (see https://matrix-org.github.io/synapse/latest/manhole.html),
+        #       'metrics' (see https://matrix-org.github.io/synapse/latest/metrics-howto.html),
+        #       'replication' (see https://matrix-org.github.io/synapse/latest/workers.html).
         #
         #   tls: set to true to enable TLS for this listener. Will use the TLS
         #       key/cert specified in tls_private_key_path / tls_certificate_path.
@@ -1015,8 +1015,8 @@ class ServerConfig(Config):
         #   client: the client-server API (/_matrix/client), and the synapse admin
         #       API (/_synapse/admin). Also implies 'media' and 'static'.
         #
-        #   consent: user consent forms (/_matrix/consent). See
-        #       docs/consent_tracking.md.
+        #   consent: user consent forms (/_matrix/consent).
+        #       See https://matrix-org.github.io/synapse/latest/consent_tracking.html.
         #
         #   federation: the server-server API (/_matrix/federation). Also implies
         #       'media', 'keys', 'openid'
@@ -1025,12 +1025,13 @@ class ServerConfig(Config):
         #
         #   media: the media API (/_matrix/media).
         #
-        #   metrics: the metrics interface. See docs/metrics-howto.md.
+        #   metrics: the metrics interface.
+        #       See https://matrix-org.github.io/synapse/latest/metrics-howto.html.
         #
         #   openid: OpenID authentication.
         #
-        #   replication: the HTTP replication API (/_synapse/replication). See
-        #       docs/workers.md.
+        #   replication: the HTTP replication API (/_synapse/replication).
+        #       See https://matrix-org.github.io/synapse/latest/workers.html.
         #
         #   static: static resources under synapse/static (/_matrix/static). (Mostly
         #       useful for 'fallback authentication'.)
@@ -1050,7 +1051,7 @@ class ServerConfig(Config):
           # that unwraps TLS.
           #
           # If you plan to use a reverse proxy, please see
-          # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md.
+          # https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
           #
           %(unsecure_http_bindings)s
 
diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py
index d0311d6468..cb7716c837 100644
--- a/synapse/config/spam_checker.py
+++ b/synapse/config/spam_checker.py
@@ -26,7 +26,7 @@ LEGACY_SPAM_CHECKER_WARNING = """
 This server is using a spam checker module that is implementing the deprecated spam
 checker interface. Please check with the module's maintainer to see if a new version
 supporting Synapse's generic modules system is available.
-For more information, please see https://matrix-org.github.io/synapse/develop/modules.html
+For more information, please see https://matrix-org.github.io/synapse/latest/modules.html
 ---------------------------------------------------------------------------------------"""
 
 
diff --git a/synapse/config/stats.py b/synapse/config/stats.py
index 3d44b51201..78f61fe9da 100644
--- a/synapse/config/stats.py
+++ b/synapse/config/stats.py
@@ -51,7 +51,7 @@ class StatsConfig(Config):
     def generate_config_section(self, config_dir_path, server_name, **kwargs):
         return """
         # Settings for local room and user statistics collection. See
-        # docs/room_and_user_statistics.md.
+        # https://matrix-org.github.io/synapse/latest/room_and_user_statistics.html.
         #
         stats:
           # Uncomment the following to disable room and user statistics. Note that doing
diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py
index d0ea17261f..21b9a88353 100644
--- a/synapse/config/tracer.py
+++ b/synapse/config/tracer.py
@@ -81,7 +81,7 @@ class TracerConfig(Config):
             #enabled: true
 
             # The list of homeservers we wish to send and receive span contexts and span baggage.
-            # See docs/opentracing.rst.
+            # See https://matrix-org.github.io/synapse/latest/opentracing.html.
             #
             # This is a list of regexes which are matched against the server_name of the
             # homeserver.
diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py
index 4cbf79eeed..b10df8a232 100644
--- a/synapse/config/user_directory.py
+++ b/synapse/config/user_directory.py
@@ -50,7 +50,7 @@ class UserDirectoryConfig(Config):
             #
             # If you set it true, you'll have to rebuild the user_directory search
             # indexes, see:
-            # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
+            # https://matrix-org.github.io/synapse/latest/user_directory.html
             #
             # Uncomment to return search results containing all known users, even if that
             # user does not share a room with the requester.
-- 
cgit 1.4.1


From 189652b2fea038340e4e1420081c6ddd8093da0e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 7 Jul 2021 12:54:57 +0100
Subject: Fix a broken link in the admin api docs (#10322)

* Fix a broken link in the admin api docs

* Rename 10321.doc to 10321.docs

* Rename 10321.docs to 10322.doc
---
 changelog.d/10322.doc             | 1 +
 docs/admin_api/media_admin_api.md | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/10322.doc

diff --git a/changelog.d/10322.doc b/changelog.d/10322.doc
new file mode 100644
index 0000000000..db604cf2aa
--- /dev/null
+++ b/changelog.d/10322.doc
@@ -0,0 +1 @@
+Fix a broken link in the admin api docs.
diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md
index b033fc03ef..61bed1e0d5 100644
--- a/docs/admin_api/media_admin_api.md
+++ b/docs/admin_api/media_admin_api.md
@@ -47,7 +47,7 @@ The API returns a JSON body like the following:
 ## List all media uploaded by a user
 
 Listing all media that has been uploaded by a local user can be achieved through
-the use of the [List media of a user](user_admin_api.rst#list-media-of-a-user)
+the use of the [List media of a user](user_admin_api.md#list-media-of-a-user)
 Admin API.
 
 # Quarantine media
@@ -257,7 +257,7 @@ URL Parameters
 * `server_name`: string - The name of your local server (e.g `matrix.org`).
 * `before_ts`: string representing a positive integer - Unix timestamp in ms.
 Files that were last used before this timestamp will be deleted. It is the timestamp of
-last access and not the timestamp creation. 
+last access and not the timestamp creation.
 * `size_gt`: Optional - string representing a positive integer - Size of the media in bytes.
 Files that are larger will be deleted. Defaults to `0`.
 * `keep_profiles`: Optional - string representing a boolean - Switch to also delete files
-- 
cgit 1.4.1


From 225be7778727682e250a02acf975217f8eca9ed7 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 8 Jul 2021 13:00:05 +0200
Subject: Rebuild event auth when rebuilding an event after a call to a
 `ThirdPartyEventRules` module (#10316)

Because modules might send extra state events when processing an event (e.g. matrix-org/synapse-dinsic#100), and in some cases these extra events might get dropped if we don't recalculate the initial event's auth.
---
 changelog.d/10316.misc      |  1 +
 synapse/handlers/message.py | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/10316.misc

diff --git a/changelog.d/10316.misc b/changelog.d/10316.misc
new file mode 100644
index 0000000000..1fd0810fde
--- /dev/null
+++ b/changelog.d/10316.misc
@@ -0,0 +1 @@
+Rebuild event context and auth when processing specific results from `ThirdPartyEventRules` modules.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 66e40a915d..b960e18c4c 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1594,11 +1594,13 @@ class EventCreationHandler:
         for k, v in original_event.internal_metadata.get_dict().items():
             setattr(builder.internal_metadata, k, v)
 
-        # the event type hasn't changed, so there's no point in re-calculating the
-        # auth events.
+        # modules can send new state events, so we re-calculate the auth events just in
+        # case.
+        prev_event_ids = await self.store.get_prev_events_for_room(builder.room_id)
+
         event = await builder.build(
-            prev_event_ids=original_event.prev_event_ids(),
-            auth_event_ids=original_event.auth_event_ids(),
+            prev_event_ids=prev_event_ids,
+            auth_event_ids=None,
         )
 
         # we rebuild the event context, to be on the safe side. If nothing else,
-- 
cgit 1.4.1


From aa7806486960f501d72917f1a90a36cdc8035a05 Mon Sep 17 00:00:00 2001
From: reivilibre <38398653+reivilibre@users.noreply.github.com>
Date: Thu, 8 Jul 2021 14:27:12 +0100
Subject: Minor changes to `user_daily_visits` (#10324)

* Use fake time in tests in _get_start_of_day.

* Change the inequality of last_seen in user_daily_visits

Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/10324.misc                    | 1 +
 synapse/storage/databases/main/metrics.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/10324.misc

diff --git a/changelog.d/10324.misc b/changelog.d/10324.misc
new file mode 100644
index 0000000000..3c3ee6d6fc
--- /dev/null
+++ b/changelog.d/10324.misc
@@ -0,0 +1 @@
+Minor change to the code that populates `user_daily_visits`.
diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py
index c3f551d377..e3a544d9b2 100644
--- a/synapse/storage/databases/main/metrics.py
+++ b/synapse/storage/databases/main/metrics.py
@@ -320,7 +320,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
         """
         Returns millisecond unixtime for start of UTC day.
         """
-        now = time.gmtime()
+        now = time.gmtime(self._clock.time())
         today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
         return today_start * 1000
 
@@ -352,7 +352,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
                     ) udv
                     ON u.user_id = udv.user_id AND u.device_id=udv.device_id
                     INNER JOIN users ON users.name=u.user_id
-                    WHERE last_seen > ? AND last_seen <= ?
+                    WHERE ? <= last_seen AND last_seen < ?
                     AND udv.timestamp IS NULL AND users.is_guest=0
                     AND users.appservice_id IS NULL
                     GROUP BY u.user_id, u.device_id
-- 
cgit 1.4.1


From f6767abc054f3461cd9a70ba096fcf9a8e640edb Mon Sep 17 00:00:00 2001
From: Cristina <hi@xmunoz.com>
Date: Thu, 8 Jul 2021 10:57:13 -0500
Subject: Remove functionality associated with unused historical stats tables
 (#9721)

Fixes #9602
---
 changelog.d/9721.removal                       |   1 +
 docs/room_and_user_statistics.md               |  50 +----
 docs/sample_config.yaml                        |   5 -
 synapse/config/stats.py                        |   9 -
 synapse/handlers/stats.py                      |  27 ---
 synapse/storage/databases/main/purge_events.py |   1 -
 synapse/storage/databases/main/stats.py        | 291 +------------------------
 synapse/storage/schema/__init__.py             |   6 +-
 tests/handlers/test_stats.py                   | 203 +----------------
 tests/rest/admin/test_room.py                  |   1 -
 10 files changed, 22 insertions(+), 572 deletions(-)
 create mode 100644 changelog.d/9721.removal

diff --git a/changelog.d/9721.removal b/changelog.d/9721.removal
new file mode 100644
index 0000000000..da2ba48c84
--- /dev/null
+++ b/changelog.d/9721.removal
@@ -0,0 +1 @@
+Remove functionality associated with the unused `room_stats_historical` and `user_stats_historical` tables. Contributed by @xmunoz.
diff --git a/docs/room_and_user_statistics.md b/docs/room_and_user_statistics.md
index e1facb38d4..cc38c890bb 100644
--- a/docs/room_and_user_statistics.md
+++ b/docs/room_and_user_statistics.md
@@ -1,9 +1,9 @@
 Room and User Statistics
 ========================
 
-Synapse maintains room and user statistics (as well as a cache of room state),
-in various tables. These can be used for administrative purposes but are also
-used when generating the public room directory.
+Synapse maintains room and user statistics in various tables. These can be used
+for administrative purposes but are also used when generating the public room
+directory.
 
 
 # Synapse Developer Documentation
@@ -15,48 +15,8 @@ used when generating the public room directory.
 * **subject**: Something we are tracking stats about – currently a room or user.
 * **current row**: An entry for a subject in the appropriate current statistics
     table. Each subject can have only one.
-* **historical row**: An entry for a subject in the appropriate historical
-    statistics table. Each subject can have any number of these.
 
 ### Overview
 
-Stats are maintained as time series. There are two kinds of column:
-
-* absolute columns – where the value is correct for the time given by `end_ts`
-    in the stats row. (Imagine a line graph for these values)
-    * They can also be thought of as 'gauges' in Prometheus, if you are familiar.
-* per-slice columns – where the value corresponds to how many of the occurrences
-    occurred within the time slice given by `(end_ts − bucket_size)…end_ts`
-    or `start_ts…end_ts`. (Imagine a histogram for these values)
-
-Stats are maintained in two tables (for each type): current and historical.
-
-Current stats correspond to the present values. Each subject can only have one
-entry.
-
-Historical stats correspond to values in the past. Subjects may have multiple
-entries.
-
-## Concepts around the management of stats
-
-### Current rows
-
-Current rows contain the most up-to-date statistics for a room.
-They only contain absolute columns
-
-### Historical rows
-
-Historical rows can always be considered to be valid for the time slice and
-end time specified.
-
-* historical rows will not exist for every time slice – they will be omitted
-    if there were no changes. In this case, the following assumptions can be
-    made to interpolate/recreate missing rows:
-    - absolute fields have the same values as in the preceding row
-    - per-slice fields are zero (`0`)
-* historical rows will not be retained forever – rows older than a configurable
-    time will be purged.
-
-#### Purge
-
-The purging of historical rows is not yet implemented.
+Stats correspond to the present values. Current rows contain the most up-to-date
+statistics for a room. Each subject can only have one entry.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 71463168e3..cbbe7d58d9 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -2652,11 +2652,6 @@ stats:
   #
   #enabled: false
 
-  # The size of each timeslice in the room_stats_historical and
-  # user_stats_historical tables, as a time period. Defaults to "1d".
-  #
-  #bucket_size: 1h
-
 
 # Server Notices room configuration
 #
diff --git a/synapse/config/stats.py b/synapse/config/stats.py
index 78f61fe9da..6f253e00c0 100644
--- a/synapse/config/stats.py
+++ b/synapse/config/stats.py
@@ -38,13 +38,9 @@ class StatsConfig(Config):
 
     def read_config(self, config, **kwargs):
         self.stats_enabled = True
-        self.stats_bucket_size = 86400 * 1000
         stats_config = config.get("stats", None)
         if stats_config:
             self.stats_enabled = stats_config.get("enabled", self.stats_enabled)
-            self.stats_bucket_size = self.parse_duration(
-                stats_config.get("bucket_size", "1d")
-            )
         if not self.stats_enabled:
             logger.warning(ROOM_STATS_DISABLED_WARN)
 
@@ -59,9 +55,4 @@ class StatsConfig(Config):
           # correctly.
           #
           #enabled: false
-
-          # The size of each timeslice in the room_stats_historical and
-          # user_stats_historical tables, as a time period. Defaults to "1d".
-          #
-          #bucket_size: 1h
         """
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 4e45d1da57..814d08efcb 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -45,7 +45,6 @@ class StatsHandler:
         self.clock = hs.get_clock()
         self.notifier = hs.get_notifier()
         self.is_mine_id = hs.is_mine_id
-        self.stats_bucket_size = hs.config.stats_bucket_size
 
         self.stats_enabled = hs.config.stats_enabled
 
@@ -106,20 +105,6 @@ class StatsHandler:
                 room_deltas = {}
                 user_deltas = {}
 
-            # Then count deltas for total_events and total_event_bytes.
-            (
-                room_count,
-                user_count,
-            ) = await self.store.get_changes_room_total_events_and_bytes(
-                self.pos, max_pos
-            )
-
-            for room_id, fields in room_count.items():
-                room_deltas.setdefault(room_id, Counter()).update(fields)
-
-            for user_id, fields in user_count.items():
-                user_deltas.setdefault(user_id, Counter()).update(fields)
-
             logger.debug("room_deltas: %s", room_deltas)
             logger.debug("user_deltas: %s", user_deltas)
 
@@ -181,12 +166,10 @@ class StatsHandler:
 
             event_content = {}  # type: JsonDict
 
-            sender = None
             if event_id is not None:
                 event = await self.store.get_event(event_id, allow_none=True)
                 if event:
                     event_content = event.content or {}
-                    sender = event.sender
 
             # All the values in this dict are deltas (RELATIVE changes)
             room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter())
@@ -244,12 +227,6 @@ class StatsHandler:
                     room_stats_delta["joined_members"] += 1
                 elif membership == Membership.INVITE:
                     room_stats_delta["invited_members"] += 1
-
-                    if sender and self.is_mine_id(sender):
-                        user_to_stats_deltas.setdefault(sender, Counter())[
-                            "invites_sent"
-                        ] += 1
-
                 elif membership == Membership.LEAVE:
                     room_stats_delta["left_members"] += 1
                 elif membership == Membership.BAN:
@@ -279,10 +256,6 @@ class StatsHandler:
                 room_state["is_federatable"] = (
                     event_content.get("m.federate", True) is True
                 )
-                if sender and self.is_mine_id(sender):
-                    user_to_stats_deltas.setdefault(sender, Counter())[
-                        "rooms_created"
-                    ] += 1
             elif typ == EventTypes.JoinRules:
                 room_state["join_rules"] = event_content.get("join_rule")
             elif typ == EventTypes.RoomHistoryVisibility:
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 7fb7780d0f..ec6b1eb5d4 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -392,7 +392,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "room_memberships",
             "room_stats_state",
             "room_stats_current",
-            "room_stats_historical",
             "room_stats_earliest_token",
             "rooms",
             "stream_ordering_to_exterm",
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 82a1833509..b10bee6daf 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -26,7 +26,6 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import StoreError
 from synapse.storage.database import DatabasePool
 from synapse.storage.databases.main.state_deltas import StateDeltasStore
-from synapse.storage.engines import PostgresEngine
 from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
 
@@ -49,14 +48,6 @@ ABSOLUTE_STATS_FIELDS = {
     "user": ("joined_rooms",),
 }
 
-# these fields are per-timeslice and so should be reset to 0 upon a new slice
-# You can draw these stats on a histogram.
-# Example: number of events sent locally during a time slice
-PER_SLICE_FIELDS = {
-    "room": ("total_events", "total_event_bytes"),
-    "user": ("invites_sent", "rooms_created", "total_events", "total_event_bytes"),
-}
-
 TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")}
 
 # these are the tables (& ID columns) which contain our actual subjects
@@ -106,7 +97,6 @@ class StatsStore(StateDeltasStore):
         self.server_name = hs.hostname
         self.clock = self.hs.get_clock()
         self.stats_enabled = hs.config.stats_enabled
-        self.stats_bucket_size = hs.config.stats_bucket_size
 
         self.stats_delta_processing_lock = DeferredLock()
 
@@ -122,22 +112,6 @@ class StatsStore(StateDeltasStore):
         self.db_pool.updates.register_noop_background_update("populate_stats_cleanup")
         self.db_pool.updates.register_noop_background_update("populate_stats_prepare")
 
-    def quantise_stats_time(self, ts):
-        """
-        Quantises a timestamp to be a multiple of the bucket size.
-
-        Args:
-            ts (int): the timestamp to quantise, in milliseconds since the Unix
-                Epoch
-
-        Returns:
-            int: a timestamp which
-              - is divisible by the bucket size;
-              - is no later than `ts`; and
-              - is the largest such timestamp.
-        """
-        return (ts // self.stats_bucket_size) * self.stats_bucket_size
-
     async def _populate_stats_process_users(self, progress, batch_size):
         """
         This is a background update which regenerates statistics for users.
@@ -288,56 +262,6 @@ class StatsStore(StateDeltasStore):
             desc="update_room_state",
         )
 
-    async def get_statistics_for_subject(
-        self, stats_type: str, stats_id: str, start: str, size: int = 100
-    ) -> List[dict]:
-        """
-        Get statistics for a given subject.
-
-        Args:
-            stats_type: The type of subject
-            stats_id: The ID of the subject (e.g. room_id or user_id)
-            start: Pagination start. Number of entries, not timestamp.
-            size: How many entries to return.
-
-        Returns:
-            A list of dicts, where the dict has the keys of
-            ABSOLUTE_STATS_FIELDS[stats_type],  and "bucket_size" and "end_ts".
-        """
-        return await self.db_pool.runInteraction(
-            "get_statistics_for_subject",
-            self._get_statistics_for_subject_txn,
-            stats_type,
-            stats_id,
-            start,
-            size,
-        )
-
-    def _get_statistics_for_subject_txn(
-        self, txn, stats_type, stats_id, start, size=100
-    ):
-        """
-        Transaction-bound version of L{get_statistics_for_subject}.
-        """
-
-        table, id_col = TYPE_TO_TABLE[stats_type]
-        selected_columns = list(
-            ABSOLUTE_STATS_FIELDS[stats_type] + PER_SLICE_FIELDS[stats_type]
-        )
-
-        slice_list = self.db_pool.simple_select_list_paginate_txn(
-            txn,
-            table + "_historical",
-            "end_ts",
-            start,
-            size,
-            retcols=selected_columns + ["bucket_size", "end_ts"],
-            keyvalues={id_col: stats_id},
-            order_direction="DESC",
-        )
-
-        return slice_list
-
     @cached()
     async def get_earliest_token_for_stats(
         self, stats_type: str, id: str
@@ -451,14 +375,10 @@ class StatsStore(StateDeltasStore):
 
         table, id_col = TYPE_TO_TABLE[stats_type]
 
-        quantised_ts = self.quantise_stats_time(int(ts))
-        end_ts = quantised_ts + self.stats_bucket_size
-
         # Lets be paranoid and check that all the given field names are known
         abs_field_names = ABSOLUTE_STATS_FIELDS[stats_type]
-        slice_field_names = PER_SLICE_FIELDS[stats_type]
         for field in chain(fields.keys(), absolute_field_overrides.keys()):
-            if field not in abs_field_names and field not in slice_field_names:
+            if field not in abs_field_names:
                 # guard against potential SQL injection dodginess
                 raise ValueError(
                     "%s is not a recognised field"
@@ -491,20 +411,6 @@ class StatsStore(StateDeltasStore):
             additive_relatives=deltas_of_absolute_fields,
         )
 
-        per_slice_additive_relatives = {
-            key: fields.get(key, 0) for key in slice_field_names
-        }
-        self._upsert_copy_from_table_with_additive_relatives_txn(
-            txn=txn,
-            into_table=table + "_historical",
-            keyvalues={id_col: stats_id},
-            extra_dst_insvalues={"bucket_size": self.stats_bucket_size},
-            extra_dst_keyvalues={"end_ts": end_ts},
-            additive_relatives=per_slice_additive_relatives,
-            src_table=table + "_current",
-            copy_columns=abs_field_names,
-        )
-
     def _upsert_with_additive_relatives_txn(
         self, txn, table, keyvalues, absolutes, additive_relatives
     ):
@@ -572,201 +478,6 @@ class StatsStore(StateDeltasStore):
                 current_row.update(absolutes)
                 self.db_pool.simple_update_one_txn(txn, table, keyvalues, current_row)
 
-    def _upsert_copy_from_table_with_additive_relatives_txn(
-        self,
-        txn,
-        into_table,
-        keyvalues,
-        extra_dst_keyvalues,
-        extra_dst_insvalues,
-        additive_relatives,
-        src_table,
-        copy_columns,
-    ):
-        """Updates the historic stats table with latest updates.
-
-        This involves copying "absolute" fields from the `_current` table, and
-        adding relative fields to any existing values.
-
-        Args:
-             txn: Transaction
-             into_table (str): The destination table to UPSERT the row into
-             keyvalues (dict[str, any]): Row-identifying key values
-             extra_dst_keyvalues (dict[str, any]): Additional keyvalues
-                for `into_table`.
-             extra_dst_insvalues (dict[str, any]): Additional values to insert
-                on new row creation for `into_table`.
-             additive_relatives (dict[str, any]): Fields that will be added onto
-                if existing row present. (Must be disjoint from copy_columns.)
-             src_table (str): The source table to copy from
-             copy_columns (iterable[str]): The list of columns to copy
-        """
-        if self.database_engine.can_native_upsert:
-            ins_columns = chain(
-                keyvalues,
-                copy_columns,
-                additive_relatives,
-                extra_dst_keyvalues,
-                extra_dst_insvalues,
-            )
-            sel_exprs = chain(
-                keyvalues,
-                copy_columns,
-                (
-                    "?"
-                    for _ in chain(
-                        additive_relatives, extra_dst_keyvalues, extra_dst_insvalues
-                    )
-                ),
-            )
-            keyvalues_where = ("%s = ?" % f for f in keyvalues)
-
-            sets_cc = ("%s = EXCLUDED.%s" % (f, f) for f in copy_columns)
-            sets_ar = (
-                "%s = EXCLUDED.%s + %s.%s" % (f, f, into_table, f)
-                for f in additive_relatives
-            )
-
-            sql = """
-                INSERT INTO %(into_table)s (%(ins_columns)s)
-                SELECT %(sel_exprs)s
-                FROM %(src_table)s
-                WHERE %(keyvalues_where)s
-                ON CONFLICT (%(keyvalues)s)
-                DO UPDATE SET %(sets)s
-            """ % {
-                "into_table": into_table,
-                "ins_columns": ", ".join(ins_columns),
-                "sel_exprs": ", ".join(sel_exprs),
-                "keyvalues_where": " AND ".join(keyvalues_where),
-                "src_table": src_table,
-                "keyvalues": ", ".join(
-                    chain(keyvalues.keys(), extra_dst_keyvalues.keys())
-                ),
-                "sets": ", ".join(chain(sets_cc, sets_ar)),
-            }
-
-            qargs = list(
-                chain(
-                    additive_relatives.values(),
-                    extra_dst_keyvalues.values(),
-                    extra_dst_insvalues.values(),
-                    keyvalues.values(),
-                )
-            )
-            txn.execute(sql, qargs)
-        else:
-            self.database_engine.lock_table(txn, into_table)
-            src_row = self.db_pool.simple_select_one_txn(
-                txn, src_table, keyvalues, copy_columns
-            )
-            all_dest_keyvalues = {**keyvalues, **extra_dst_keyvalues}
-            dest_current_row = self.db_pool.simple_select_one_txn(
-                txn,
-                into_table,
-                keyvalues=all_dest_keyvalues,
-                retcols=list(chain(additive_relatives.keys(), copy_columns)),
-                allow_none=True,
-            )
-
-            if dest_current_row is None:
-                merged_dict = {
-                    **keyvalues,
-                    **extra_dst_keyvalues,
-                    **extra_dst_insvalues,
-                    **src_row,
-                    **additive_relatives,
-                }
-                self.db_pool.simple_insert_txn(txn, into_table, merged_dict)
-            else:
-                for (key, val) in additive_relatives.items():
-                    src_row[key] = dest_current_row[key] + val
-                self.db_pool.simple_update_txn(
-                    txn, into_table, all_dest_keyvalues, src_row
-                )
-
-    async def get_changes_room_total_events_and_bytes(
-        self, min_pos: int, max_pos: int
-    ) -> Tuple[Dict[str, Dict[str, int]], Dict[str, Dict[str, int]]]:
-        """Fetches the counts of events in the given range of stream IDs.
-
-        Args:
-            min_pos
-            max_pos
-
-        Returns:
-            Mapping of room ID to field changes.
-        """
-
-        return await self.db_pool.runInteraction(
-            "stats_incremental_total_events_and_bytes",
-            self.get_changes_room_total_events_and_bytes_txn,
-            min_pos,
-            max_pos,
-        )
-
-    def get_changes_room_total_events_and_bytes_txn(
-        self, txn, low_pos: int, high_pos: int
-    ) -> Tuple[Dict[str, Dict[str, int]], Dict[str, Dict[str, int]]]:
-        """Gets the total_events and total_event_bytes counts for rooms and
-        senders, in a range of stream_orderings (including backfilled events).
-
-        Args:
-            txn
-            low_pos: Low stream ordering
-            high_pos: High stream ordering
-
-        Returns:
-            The room and user deltas for total_events/total_event_bytes in the
-            format of `stats_id` -> fields
-        """
-
-        if low_pos >= high_pos:
-            # nothing to do here.
-            return {}, {}
-
-        if isinstance(self.database_engine, PostgresEngine):
-            new_bytes_expression = "OCTET_LENGTH(json)"
-        else:
-            new_bytes_expression = "LENGTH(CAST(json AS BLOB))"
-
-        sql = """
-            SELECT events.room_id, COUNT(*) AS new_events, SUM(%s) AS new_bytes
-            FROM events INNER JOIN event_json USING (event_id)
-            WHERE (? < stream_ordering AND stream_ordering <= ?)
-                OR (? <= stream_ordering AND stream_ordering <= ?)
-            GROUP BY events.room_id
-        """ % (
-            new_bytes_expression,
-        )
-
-        txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
-
-        room_deltas = {
-            room_id: {"total_events": new_events, "total_event_bytes": new_bytes}
-            for room_id, new_events, new_bytes in txn
-        }
-
-        sql = """
-            SELECT events.sender, COUNT(*) AS new_events, SUM(%s) AS new_bytes
-            FROM events INNER JOIN event_json USING (event_id)
-            WHERE (? < stream_ordering AND stream_ordering <= ?)
-                OR (? <= stream_ordering AND stream_ordering <= ?)
-            GROUP BY events.sender
-        """ % (
-            new_bytes_expression,
-        )
-
-        txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
-
-        user_deltas = {
-            user_id: {"total_events": new_events, "total_event_bytes": new_bytes}
-            for user_id, new_events, new_bytes in txn
-            if self.hs.is_mine_id(user_id)
-        }
-
-        return room_deltas, user_deltas
-
     async def _calculate_and_set_initial_state_for_room(
         self, room_id: str
     ) -> Tuple[dict, dict, int]:
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 0a53b73ccc..36340a652a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 60
+SCHEMA_VERSION = 61
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -21,6 +21,10 @@ older versions of Synapse).
 
 See `README.md <synapse/storage/schema/README.md>`_  for more information on how this
 works.
+
+Changes in SCHEMA_VERSION = 61:
+    - The `user_stats_historical` and `room_stats_historical` tables are not written and
+      are not read (previously, they were written but not read).
 """
 
 
diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py
index c9d4fd9336..e4059acda3 100644
--- a/tests/handlers/test_stats.py
+++ b/tests/handlers/test_stats.py
@@ -88,16 +88,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
     def _get_current_stats(self, stats_type, stat_id):
         table, id_col = stats.TYPE_TO_TABLE[stats_type]
 
-        cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type]) + list(
-            stats.PER_SLICE_FIELDS[stats_type]
-        )
-
-        end_ts = self.store.quantise_stats_time(self.reactor.seconds() * 1000)
+        cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type])
 
         return self.get_success(
             self.store.db_pool.simple_select_one(
-                table + "_historical",
-                {id_col: stat_id, end_ts: end_ts},
+                table + "_current",
+                {id_col: stat_id},
                 cols,
                 allow_none=True,
             )
@@ -156,115 +152,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertEqual(len(r), 1)
         self.assertEqual(r[0]["topic"], "foo")
 
-    def test_initial_earliest_token(self):
-        """
-        Ingestion via notify_new_event will ignore tokens that the background
-        update have already processed.
-        """
-
-        self.reactor.advance(86401)
-
-        self.hs.config.stats_enabled = False
-        self.handler.stats_enabled = False
-
-        u1 = self.register_user("u1", "pass")
-        u1_token = self.login("u1", "pass")
-
-        u2 = self.register_user("u2", "pass")
-        u2_token = self.login("u2", "pass")
-
-        u3 = self.register_user("u3", "pass")
-        u3_token = self.login("u3", "pass")
-
-        room_1 = self.helper.create_room_as(u1, tok=u1_token)
-        self.helper.send_state(
-            room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token
-        )
-
-        # Begin the ingestion by creating the temp tables. This will also store
-        # the position that the deltas should begin at, once they take over.
-        self.hs.config.stats_enabled = True
-        self.handler.stats_enabled = True
-        self.store.db_pool.updates._all_done = False
-        self.get_success(
-            self.store.db_pool.simple_update_one(
-                table="stats_incremental_position",
-                keyvalues={},
-                updatevalues={"stream_id": 0},
-            )
-        )
-
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {"update_name": "populate_stats_prepare", "progress_json": "{}"},
-            )
-        )
-
-        while not self.get_success(
-            self.store.db_pool.updates.has_completed_background_updates()
-        ):
-            self.get_success(
-                self.store.db_pool.updates.do_next_background_update(100), by=0.1
-            )
-
-        # Now, before the table is actually ingested, add some more events.
-        self.helper.invite(room=room_1, src=u1, targ=u2, tok=u1_token)
-        self.helper.join(room=room_1, user=u2, tok=u2_token)
-
-        # orig_delta_processor = self.store.
-
-        # Now do the initial ingestion.
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {"update_name": "populate_stats_process_rooms", "progress_json": "{}"},
-            )
-        )
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {
-                    "update_name": "populate_stats_cleanup",
-                    "progress_json": "{}",
-                    "depends_on": "populate_stats_process_rooms",
-                },
-            )
-        )
-
-        self.store.db_pool.updates._all_done = False
-        while not self.get_success(
-            self.store.db_pool.updates.has_completed_background_updates()
-        ):
-            self.get_success(
-                self.store.db_pool.updates.do_next_background_update(100), by=0.1
-            )
-
-        self.reactor.advance(86401)
-
-        # Now add some more events, triggering ingestion. Because of the stream
-        # position being set to before the events sent in the middle, a simpler
-        # implementation would reprocess those events, and say there were four
-        # users, not three.
-        self.helper.invite(room=room_1, src=u1, targ=u3, tok=u1_token)
-        self.helper.join(room=room_1, user=u3, tok=u3_token)
-
-        # self.handler.notify_new_event()
-
-        # We need to let the delta processor advance…
-        self.reactor.advance(10 * 60)
-
-        # Get the slices! There should be two -- day 1, and day 2.
-        r = self.get_success(self.store.get_statistics_for_subject("room", room_1, 0))
-
-        self.assertEqual(len(r), 2)
-
-        # The oldest has 2 joined members
-        self.assertEqual(r[-1]["joined_members"], 2)
-
-        # The newest has 3
-        self.assertEqual(r[0]["joined_members"], 3)
-
     def test_create_user(self):
         """
         When we create a user, it should have statistics already ready.
@@ -296,22 +183,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertIsNotNone(r1stats)
         self.assertIsNotNone(r2stats)
 
-        # contains the default things you'd expect in a fresh room
-        self.assertEqual(
-            r1stats["total_events"],
-            EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM,
-            "Wrong number of total_events in new room's stats!"
-            " You may need to update this if more state events are added to"
-            " the room creation process.",
-        )
-        self.assertEqual(
-            r2stats["total_events"],
-            EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM,
-            "Wrong number of total_events in new room's stats!"
-            " You may need to update this if more state events are added to"
-            " the room creation process.",
-        )
-
         self.assertEqual(
             r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM
         )
@@ -327,24 +198,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertEqual(r2stats["invited_members"], 0)
         self.assertEqual(r2stats["banned_members"], 0)
 
-    def test_send_message_increments_total_events(self):
-        """
-        When we send a message, it increments total_events.
-        """
-
-        self._perform_background_initial_update()
-
-        u1 = self.register_user("u1", "pass")
-        u1token = self.login("u1", "pass")
-        r1 = self.helper.create_room_as(u1, tok=u1token)
-        r1stats_ante = self._get_current_stats("room", r1)
-
-        self.helper.send(r1, "hiss", tok=u1token)
-
-        r1stats_post = self._get_current_stats("room", r1)
-
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
-
     def test_updating_profile_information_does_not_increase_joined_members_count(self):
         """
         Check that the joined_members count does not increase when a user changes their
@@ -378,7 +231,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_send_state_event_nonoverwriting(self):
         """
-        When we send a non-overwriting state event, it increments total_events AND current_state_events
+        When we send a non-overwriting state event, it increments current_state_events
         """
 
         self._perform_background_initial_update()
@@ -399,44 +252,14 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             1,
         )
 
-    def test_send_state_event_overwriting(self):
-        """
-        When we send an overwriting state event, it increments total_events ONLY
-        """
-
-        self._perform_background_initial_update()
-
-        u1 = self.register_user("u1", "pass")
-        u1token = self.login("u1", "pass")
-        r1 = self.helper.create_room_as(u1, tok=u1token)
-
-        self.helper.send_state(
-            r1, "cat.hissing", {"value": True}, tok=u1token, state_key="tabby"
-        )
-
-        r1stats_ante = self._get_current_stats("room", r1)
-
-        self.helper.send_state(
-            r1, "cat.hissing", {"value": False}, tok=u1token, state_key="tabby"
-        )
-
-        r1stats_post = self._get_current_stats("room", r1)
-
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
-        self.assertEqual(
-            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
-            0,
-        )
-
     def test_join_first_time(self):
         """
-        When a user joins a room for the first time, total_events, current_state_events and
+        When a user joins a room for the first time, current_state_events and
         joined_members should increase by exactly 1.
         """
 
@@ -455,7 +278,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             1,
@@ -466,7 +288,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_join_after_leave(self):
         """
-        When a user joins a room after being previously left, total_events and
+        When a user joins a room after being previously left,
         joined_members should increase by exactly 1.
         current_state_events should not increase.
         left_members should decrease by exactly 1.
@@ -490,7 +312,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
@@ -504,7 +325,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_invited(self):
         """
-        When a user invites another user, current_state_events, total_events and
+        When a user invites another user, current_state_events and
         invited_members should increase by exactly 1.
         """
 
@@ -522,7 +343,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             1,
@@ -533,7 +353,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_join_after_invite(self):
         """
-        When a user joins a room after being invited, total_events and
+        When a user joins a room after being invited and
         joined_members should increase by exactly 1.
         current_state_events should not increase.
         invited_members should decrease by exactly 1.
@@ -556,7 +376,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
@@ -570,7 +389,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_left(self):
         """
-        When a user leaves a room after joining, total_events and
+        When a user leaves a room after joining and
         left_members should increase by exactly 1.
         current_state_events should not increase.
         joined_members should decrease by exactly 1.
@@ -593,7 +412,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
@@ -607,7 +425,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_banned(self):
         """
-        When a user is banned from a room after joining, total_events and
+        When a user is banned from a room after joining and
         left_members should increase by exactly 1.
         current_state_events should not increase.
         banned_members should decrease by exactly 1.
@@ -630,7 +448,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index ee071c2477..959d3cea77 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -1753,7 +1753,6 @@ PURGE_TABLES = [
     "room_memberships",
     "room_stats_state",
     "room_stats_current",
-    "room_stats_historical",
     "room_stats_earliest_token",
     "rooms",
     "stream_ordering_to_exterm",
-- 
cgit 1.4.1


From 33ae301fee3aac6fec492b8238899cac22e3908d Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 8 Jul 2021 18:16:30 +0200
Subject: Fix formatting in the logcontext doc (#10337)

---
 changelog.d/10337.doc | 1 +
 docs/log_contexts.md  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/10337.doc

diff --git a/changelog.d/10337.doc b/changelog.d/10337.doc
new file mode 100644
index 0000000000..f305bdb3ba
--- /dev/null
+++ b/changelog.d/10337.doc
@@ -0,0 +1 @@
+Fix formatting in the logcontext documentation.
diff --git a/docs/log_contexts.md b/docs/log_contexts.md
index fe30ca2791..9a43d46091 100644
--- a/docs/log_contexts.md
+++ b/docs/log_contexts.md
@@ -17,7 +17,7 @@ class).
 Deferreds make the whole thing complicated, so this document describes
 how it all works, and how to write code which follows the rules.
 
-##Logcontexts without Deferreds
+## Logcontexts without Deferreds
 
 In the absence of any Deferred voodoo, things are simple enough. As with
 any code of this nature, the rule is that our function should leave
-- 
cgit 1.4.1


From d26094e92cace20525552e5a0c8b21ff9ce53f11 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 8 Jul 2021 20:25:59 -0500
Subject: Add base starting insertion event when no chunk ID is provided
 (MSC2716) (#10250)

* Add base starting insertion point when no chunk ID is provided

This is so we can have the marker event point to this initial
insertion event and be able to traverse the events in the first chunk.
---
 changelog.d/10250.bugfix       |   1 +
 synapse/handlers/message.py    |   8 +++
 synapse/rest/client/v1/room.py | 112 ++++++++++++++++++++++++++++++++---------
 3 files changed, 98 insertions(+), 23 deletions(-)
 create mode 100644 changelog.d/10250.bugfix

diff --git a/changelog.d/10250.bugfix b/changelog.d/10250.bugfix
new file mode 100644
index 0000000000..a8107dafb2
--- /dev/null
+++ b/changelog.d/10250.bugfix
@@ -0,0 +1 @@
+Add base starting insertion event when no chunk ID is specified in the historical batch send API.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index b960e18c4c..e06655f3d4 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -518,6 +518,9 @@ class EventCreationHandler:
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
+            historical: Indicates whether the message is being inserted
+                back in time around some existing events. This is used to skip
+                a few checks and mark the event as backfilled.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -772,6 +775,7 @@ class EventCreationHandler:
         txn_id: Optional[str] = None,
         ignore_shadow_ban: bool = False,
         outlier: bool = False,
+        historical: bool = False,
         depth: Optional[int] = None,
     ) -> Tuple[EventBase, int]:
         """
@@ -799,6 +803,9 @@ class EventCreationHandler:
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
+            historical: Indicates whether the message is being inserted
+                back in time around some existing events. This is used to skip
+                a few checks and mark the event as backfilled.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -847,6 +854,7 @@ class EventCreationHandler:
                 prev_event_ids=prev_event_ids,
                 auth_event_ids=auth_event_ids,
                 outlier=outlier,
+                historical=historical,
                 depth=depth,
             )
 
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 92ebe838fd..9c58e3689e 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -349,6 +349,35 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
 
         return depth
 
+    def _create_insertion_event_dict(
+        self, sender: str, room_id: str, origin_server_ts: int
+    ):
+        """Creates an event dict for an "insertion" event with the proper fields
+        and a random chunk ID.
+
+        Args:
+            sender: The event author MXID
+            room_id: The room ID that the event belongs to
+            origin_server_ts: Timestamp when the event was sent
+
+        Returns:
+            Tuple of event ID and stream ordering position
+        """
+
+        next_chunk_id = random_string(8)
+        insertion_event = {
+            "type": EventTypes.MSC2716_INSERTION,
+            "sender": sender,
+            "room_id": room_id,
+            "content": {
+                EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id,
+                EventContentFields.MSC2716_HISTORICAL: True,
+            },
+            "origin_server_ts": origin_server_ts,
+        }
+
+        return insertion_event
+
     async def on_POST(self, request, room_id):
         requester = await self.auth.get_user_by_req(request, allow_guest=False)
 
@@ -449,37 +478,68 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
 
         events_to_create = body["events"]
 
-        # If provided, connect the chunk to the last insertion point
-        # The chunk ID passed in comes from the chunk_id in the
-        # "insertion" event from the previous chunk.
+        prev_event_ids = prev_events_from_query
+        inherited_depth = await self.inherit_depth_from_prev_ids(prev_events_from_query)
+
+        # Figure out which chunk to connect to. If they passed in
+        # chunk_id_from_query let's use it. The chunk ID passed in comes
+        # from the chunk_id in the "insertion" event from the previous chunk.
+        last_event_in_chunk = events_to_create[-1]
+        chunk_id_to_connect_to = chunk_id_from_query
+        base_insertion_event = None
         if chunk_id_from_query:
-            last_event_in_chunk = events_to_create[-1]
-            last_event_in_chunk["content"][
-                EventContentFields.MSC2716_CHUNK_ID
-            ] = chunk_id_from_query
+            # TODO: Verify the chunk_id_from_query corresponds to an insertion event
+            pass
+        # Otherwise, create an insertion event to act as a starting point.
+        #
+        # We don't always have an insertion event to start hanging more history
+        # off of (ideally there would be one in the main DAG, but that's not the
+        # case if we're wanting to add history to e.g. existing rooms without
+        # an insertion event), in which case we just create a new insertion event
+        # that can then get pointed to by a "marker" event later.
+        else:
+            base_insertion_event_dict = self._create_insertion_event_dict(
+                sender=requester.user.to_string(),
+                room_id=room_id,
+                origin_server_ts=last_event_in_chunk["origin_server_ts"],
+            )
+            base_insertion_event_dict["prev_events"] = prev_event_ids.copy()
 
-        # Add an "insertion" event to the start of each chunk (next to the oldest
+            (
+                base_insertion_event,
+                _,
+            ) = await self.event_creation_handler.create_and_send_nonmember_event(
+                requester,
+                base_insertion_event_dict,
+                prev_event_ids=base_insertion_event_dict.get("prev_events"),
+                auth_event_ids=auth_event_ids,
+                historical=True,
+                depth=inherited_depth,
+            )
+
+            chunk_id_to_connect_to = base_insertion_event["content"][
+                EventContentFields.MSC2716_NEXT_CHUNK_ID
+            ]
+
+        # Connect this current chunk to the insertion event from the previous chunk
+        last_event_in_chunk["content"][
+            EventContentFields.MSC2716_CHUNK_ID
+        ] = chunk_id_to_connect_to
+
+        # Add an "insertion" event to the start of each chunk (next to the oldest-in-time
         # event in the chunk) so the next chunk can be connected to this one.
-        next_chunk_id = random_string(64)
-        insertion_event = {
-            "type": EventTypes.MSC2716_INSERTION,
-            "sender": requester.user.to_string(),
-            "content": {
-                EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id,
-                EventContentFields.MSC2716_HISTORICAL: True,
-            },
+        insertion_event = self._create_insertion_event_dict(
+            sender=requester.user.to_string(),
+            room_id=room_id,
             # Since the insertion event is put at the start of the chunk,
-            # where the oldest event is, copy the origin_server_ts from
+            # where the oldest-in-time event is, copy the origin_server_ts from
             # the first event we're inserting
-            "origin_server_ts": events_to_create[0]["origin_server_ts"],
-        }
+            origin_server_ts=events_to_create[0]["origin_server_ts"],
+        )
         # Prepend the insertion event to the start of the chunk
         events_to_create = [insertion_event] + events_to_create
 
-        inherited_depth = await self.inherit_depth_from_prev_ids(prev_events_from_query)
-
         event_ids = []
-        prev_event_ids = prev_events_from_query
         events_to_persist = []
         for ev in events_to_create:
             assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"])
@@ -533,10 +593,16 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
                 context=context,
             )
 
+        # Add the base_insertion_event to the bottom of the list we return
+        if base_insertion_event is not None:
+            event_ids.append(base_insertion_event.event_id)
+
         return 200, {
             "state_events": auth_event_ids,
             "events": event_ids,
-            "next_chunk_id": next_chunk_id,
+            "next_chunk_id": insertion_event["content"][
+                EventContentFields.MSC2716_NEXT_CHUNK_ID
+            ],
         }
 
     def on_GET(self, request, room_id):
-- 
cgit 1.4.1