summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2021-02-22 12:55:32 +0000
committerErik Johnston <erik@matrix.org>2021-02-22 12:55:32 +0000
commit5d405f7e7a292691e30a33a762b7834d9854209b (patch)
tree40863c9e510ac4ad8aa2eada7bbdab6c9760cb27
parentMerge remote-tracking branch 'origin/release-v1.28.0' into matrix-org-hotfixes (diff)
parentClean up the user directory sample config section (#9385) (diff)
downloadsynapse-5d405f7e7a292691e30a33a762b7834d9854209b.tar.xz
Merge remote-tracking branch 'origin/develop' into matrix-org-hotfixes
-rw-r--r--CHANGES.md2
-rw-r--r--changelog.d/8957.feature1
-rw-r--r--changelog.d/9203.feature1
-rw-r--r--changelog.d/9383.feature1
-rw-r--r--changelog.d/9385.feature1
-rw-r--r--changelog.d/9402.bugfix1
-rw-r--r--changelog.d/9432.misc1
-rw-r--r--changelog.d/9438.feature1
-rw-r--r--changelog.d/9440.bugfix1
-rw-r--r--docs/sample_config.yaml56
-rw-r--r--synapse/api/constants.py7
-rw-r--r--synapse/api/ratelimiting.py10
-rw-r--r--synapse/config/_base.py17
-rw-r--r--synapse/config/federation.py10
-rw-r--r--synapse/config/ratelimiting.py10
-rw-r--r--synapse/config/server.py14
-rw-r--r--synapse/config/user_directory.py60
-rw-r--r--synapse/federation/federation_server.py20
-rw-r--r--synapse/federation/sender/__init__.py2
-rw-r--r--synapse/federation/transport/server.py7
-rw-r--r--synapse/handlers/devicemessage.py24
-rw-r--r--synapse/handlers/events.py4
-rw-r--r--synapse/handlers/initial_sync.py4
-rw-r--r--synapse/handlers/message.py8
-rw-r--r--synapse/handlers/presence.py56
-rw-r--r--synapse/handlers/profile.py9
-rw-r--r--synapse/replication/http/federation.py3
-rw-r--r--synapse/res/templates/sso_auth_account_details.html2
-rw-r--r--synapse/rest/client/v2_alpha/sendtodevice.py4
-rw-r--r--synapse/rest/media/v1/media_repository.py2
-rw-r--r--synapse/rest/media/v1/media_storage.py2
-rw-r--r--synapse/rest/media/v1/thumbnail_resource.py56
-rw-r--r--synapse/storage/databases/main/media_repository.py18
-rw-r--r--synapse/storage/databases/main/user_directory.py59
-rw-r--r--tests/handlers/test_presence.py14
-rw-r--r--tests/handlers/test_profile.py6
-rw-r--r--tests/handlers/test_user_directory.py97
-rw-r--r--tests/rest/media/v1/test_media_storage.py69
38 files changed, 551 insertions, 109 deletions
diff --git a/CHANGES.md b/CHANGES.md
index 67ebf86d6d..c216d28818 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,4 +1,4 @@
-Synapse 1.28.0rc1 (2021-02-18)
+Synapse 1.28.0rc1 (2021-02-19)
 ==============================
 
 Note that this release drops support for ARMv7 in the official Docker images, due to repeated problems building for ARMv7 (and the associated maintenance burden this entails).
diff --git a/changelog.d/8957.feature b/changelog.d/8957.feature
new file mode 100644
index 0000000000..fa8961f840
--- /dev/null
+++ b/changelog.d/8957.feature
@@ -0,0 +1 @@
+Add rate limiters to cross-user key sharing requests.
diff --git a/changelog.d/9203.feature b/changelog.d/9203.feature
new file mode 100644
index 0000000000..36b66a47a8
--- /dev/null
+++ b/changelog.d/9203.feature
@@ -0,0 +1 @@
+Add some configuration settings to make users' profile data more private.
diff --git a/changelog.d/9383.feature b/changelog.d/9383.feature
new file mode 100644
index 0000000000..8957c9cc5e
--- /dev/null
+++ b/changelog.d/9383.feature
@@ -0,0 +1 @@
+Add a configuration option, `user_directory.prefer_local_users`, which when enabled will make it more likely for users on the same server as you to appear above other users.
\ No newline at end of file
diff --git a/changelog.d/9385.feature b/changelog.d/9385.feature
new file mode 100644
index 0000000000..cbe3922de8
--- /dev/null
+++ b/changelog.d/9385.feature
@@ -0,0 +1 @@
+ Add a configuration option, `user_directory.prefer_local_users`, which when enabled will make it more likely for users on the same server as you to appear above other users.
\ No newline at end of file
diff --git a/changelog.d/9402.bugfix b/changelog.d/9402.bugfix
new file mode 100644
index 0000000000..7729225ba2
--- /dev/null
+++ b/changelog.d/9402.bugfix
@@ -0,0 +1 @@
+Fix a bug where a lot of unnecessary presence updates were sent when joining a room.
diff --git a/changelog.d/9432.misc b/changelog.d/9432.misc
new file mode 100644
index 0000000000..1e07da2033
--- /dev/null
+++ b/changelog.d/9432.misc
@@ -0,0 +1 @@
+Add documentation and type hints to `parse_duration`.
diff --git a/changelog.d/9438.feature b/changelog.d/9438.feature
new file mode 100644
index 0000000000..a1f6be5563
--- /dev/null
+++ b/changelog.d/9438.feature
@@ -0,0 +1 @@
+Add support for regenerating thumbnails if they have been deleted but the original image is still stored.
diff --git a/changelog.d/9440.bugfix b/changelog.d/9440.bugfix
new file mode 100644
index 0000000000..47b9842b37
--- /dev/null
+++ b/changelog.d/9440.bugfix
@@ -0,0 +1 @@
+Fix bug introduced in v1.27.0 where allowing a user to choose their own username when logging in via single sign-on did not work unless an `idp_icon` was defined.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 52380dfb04..4dbef41b7e 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -101,6 +101,14 @@ pid_file: DATADIR/homeserver.pid
 #
 #limit_profile_requests_to_users_who_share_rooms: true
 
+# Uncomment to prevent a user's profile data from being retrieved and
+# displayed in a room until they have joined it. By default, a user's
+# profile data is included in an invite event, regardless of the values
+# of the above two settings, and whether or not the users share a server.
+# Defaults to 'true'.
+#
+#include_profile_data_on_invite: false
+
 # If set to 'true', removes the need for authentication to access the server's
 # public rooms directory through the client API, meaning that anyone can
 # query the room directory. Defaults to 'false'.
@@ -699,6 +707,12 @@ acme:
 #  - matrix.org
 #  - example.com
 
+# Uncomment to disable profile lookup over federation. By default, the
+# Federation API allows other homeservers to obtain profile data of any user
+# on this homeserver. Defaults to 'true'.
+#
+#allow_profile_lookup_over_federation: false
+
 
 ## Caching ##
 
@@ -2530,19 +2544,35 @@ spam_checker:
 
 # User Directory configuration
 #
-# 'enabled' defines whether users can search the user directory. If
-# false then empty responses are returned to all queries. Defaults to
-# true.
-#
-# 'search_all_users' defines whether to search all users visible to your HS
-# when searching the user directory, rather than limiting to users visible
-# in public rooms.  Defaults to false.  If you set it True, you'll have to
-# rebuild the user_directory search indexes, see
-# https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
-#
-#user_directory:
-#  enabled: true
-#  search_all_users: false
+user_directory:
+    # Defines whether users can search the user directory. If false then
+    # empty responses are returned to all queries. Defaults to true.
+    #
+    # Uncomment to disable the user directory.
+    #
+    #enabled: false
+
+    # Defines whether to search all users visible to your HS when searching
+    # the user directory, rather than limiting to users visible in public
+    # rooms. Defaults to false.
+    #
+    # If you set it true, you'll have to rebuild the user_directory search
+    # indexes, see:
+    # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
+    #
+    # Uncomment to return search results containing all known users, even if that
+    # user does not share a room with the requester.
+    #
+    #search_all_users: true
+
+    # Defines whether to prefer local users in search query results.
+    # If True, local users are more likely to appear above remote users
+    # when searching the user directory. Defaults to false.
+    #
+    # Uncomment to prefer local over remote users in user directory search
+    # results.
+    #
+    #prefer_local_users: true
 
 
 # User Consent configuration
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index af8d59cf87..691f8f9adf 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -98,11 +98,14 @@ class EventTypes:
 
     Retention = "m.room.retention"
 
-    Presence = "m.presence"
-
     Dummy = "org.matrix.dummy_event"
 
 
+class EduTypes:
+    Presence = "m.presence"
+    RoomKeyRequest = "m.room_key_request"
+
+
 class RejectedReason:
     AUTH_ERROR = "auth_error"
 
diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py
index 5d9d5a228f..c3f07bc1a3 100644
--- a/synapse/api/ratelimiting.py
+++ b/synapse/api/ratelimiting.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from collections import OrderedDict
-from typing import Any, Optional, Tuple
+from typing import Hashable, Optional, Tuple
 
 from synapse.api.errors import LimitExceededError
 from synapse.types import Requester
@@ -42,7 +42,9 @@ class Ratelimiter:
         #   * How many times an action has occurred since a point in time
         #   * The point in time
         #   * The rate_hz of this particular entry. This can vary per request
-        self.actions = OrderedDict()  # type: OrderedDict[Any, Tuple[float, int, float]]
+        self.actions = (
+            OrderedDict()
+        )  # type: OrderedDict[Hashable, Tuple[float, int, float]]
 
     def can_requester_do_action(
         self,
@@ -82,7 +84,7 @@ class Ratelimiter:
 
     def can_do_action(
         self,
-        key: Any,
+        key: Hashable,
         rate_hz: Optional[float] = None,
         burst_count: Optional[int] = None,
         update: bool = True,
@@ -175,7 +177,7 @@ class Ratelimiter:
 
     def ratelimit(
         self,
-        key: Any,
+        key: Hashable,
         rate_hz: Optional[float] = None,
         burst_count: Optional[int] = None,
         update: bool = True,
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 97399eb9ba..e89decda34 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -21,7 +21,7 @@ import os
 from collections import OrderedDict
 from hashlib import sha256
 from textwrap import dedent
-from typing import Any, Iterable, List, MutableMapping, Optional
+from typing import Any, Iterable, List, MutableMapping, Optional, Union
 
 import attr
 import jinja2
@@ -147,7 +147,20 @@ class Config:
         return int(value) * size
 
     @staticmethod
-    def parse_duration(value):
+    def parse_duration(value: Union[str, int]) -> int:
+        """Convert a duration as a string or integer to a number of milliseconds.
+
+        If an integer is provided it is treated as milliseconds and is unchanged.
+
+        String durations can have a suffix of 's', 'm', 'h', 'd', 'w', or 'y'.
+        No suffix is treated as milliseconds.
+
+        Args:
+            value: The duration to parse.
+
+        Returns:
+            The number of milliseconds in the duration.
+        """
         if isinstance(value, int):
             return value
         second = 1000
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 9f3c57e6a1..55e4db5442 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -41,6 +41,10 @@ class FederationConfig(Config):
         )
         self.federation_metrics_domains = set(federation_metrics_domains)
 
+        self.allow_profile_lookup_over_federation = config.get(
+            "allow_profile_lookup_over_federation", True
+        )
+
     def generate_config_section(self, config_dir_path, server_name, **kwargs):
         return """\
         ## Federation ##
@@ -66,6 +70,12 @@ class FederationConfig(Config):
         #federation_metrics_domains:
         #  - matrix.org
         #  - example.com
+
+        # Uncomment to disable profile lookup over federation. By default, the
+        # Federation API allows other homeservers to obtain profile data of any user
+        # on this homeserver. Defaults to 'true'.
+        #
+        #allow_profile_lookup_over_federation: false
         """
 
 
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index def33a60ad..847d25122c 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -102,6 +102,16 @@ class RatelimitConfig(Config):
             defaults={"per_second": 0.01, "burst_count": 3},
         )
 
+        # Ratelimit cross-user key requests:
+        # * For local requests this is keyed by the sending device.
+        # * For requests received over federation this is keyed by the origin.
+        #
+        # Note that this isn't exposed in the configuration as it is obscure.
+        self.rc_key_requests = RateLimitConfig(
+            config.get("rc_key_requests", {}),
+            defaults={"per_second": 20, "burst_count": 100},
+        )
+
         self.rc_3pid_validation = RateLimitConfig(
             config.get("rc_3pid_validation") or {},
             defaults={"per_second": 0.003, "burst_count": 5},
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 6f3325ff81..0bfd4398e2 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -263,6 +263,12 @@ class ServerConfig(Config):
             False,
         )
 
+        # Whether to retrieve and display profile data for a user when they
+        # are invited to a room
+        self.include_profile_data_on_invite = config.get(
+            "include_profile_data_on_invite", True
+        )
+
         if "restrict_public_rooms_to_local_users" in config and (
             "allow_public_rooms_without_auth" in config
             or "allow_public_rooms_over_federation" in config
@@ -848,6 +854,14 @@ class ServerConfig(Config):
         #
         #limit_profile_requests_to_users_who_share_rooms: true
 
+        # Uncomment to prevent a user's profile data from being retrieved and
+        # displayed in a room until they have joined it. By default, a user's
+        # profile data is included in an invite event, regardless of the values
+        # of the above two settings, and whether or not the users share a server.
+        # Defaults to 'true'.
+        #
+        #include_profile_data_on_invite: false
+
         # If set to 'true', removes the need for authentication to access the server's
         # public rooms directory through the client API, meaning that anyone can
         # query the room directory. Defaults to 'false'.
diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py
index c8d19c5d6b..8d05ef173c 100644
--- a/synapse/config/user_directory.py
+++ b/synapse/config/user_directory.py
@@ -24,32 +24,46 @@ class UserDirectoryConfig(Config):
     section = "userdirectory"
 
     def read_config(self, config, **kwargs):
-        self.user_directory_search_enabled = True
-        self.user_directory_search_all_users = False
-        user_directory_config = config.get("user_directory", None)
-        if user_directory_config:
-            self.user_directory_search_enabled = user_directory_config.get(
-                "enabled", True
-            )
-            self.user_directory_search_all_users = user_directory_config.get(
-                "search_all_users", False
-            )
+        user_directory_config = config.get("user_directory") or {}
+        self.user_directory_search_enabled = user_directory_config.get("enabled", True)
+        self.user_directory_search_all_users = user_directory_config.get(
+            "search_all_users", False
+        )
+        self.user_directory_search_prefer_local_users = user_directory_config.get(
+            "prefer_local_users", False
+        )
 
     def generate_config_section(self, config_dir_path, server_name, **kwargs):
         return """
         # User Directory configuration
         #
-        # 'enabled' defines whether users can search the user directory. If
-        # false then empty responses are returned to all queries. Defaults to
-        # true.
-        #
-        # 'search_all_users' defines whether to search all users visible to your HS
-        # when searching the user directory, rather than limiting to users visible
-        # in public rooms.  Defaults to false.  If you set it True, you'll have to
-        # rebuild the user_directory search indexes, see
-        # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
-        #
-        #user_directory:
-        #  enabled: true
-        #  search_all_users: false
+        user_directory:
+            # Defines whether users can search the user directory. If false then
+            # empty responses are returned to all queries. Defaults to true.
+            #
+            # Uncomment to disable the user directory.
+            #
+            #enabled: false
+
+            # Defines whether to search all users visible to your HS when searching
+            # the user directory, rather than limiting to users visible in public
+            # rooms. Defaults to false.
+            #
+            # If you set it true, you'll have to rebuild the user_directory search
+            # indexes, see:
+            # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
+            #
+            # Uncomment to return search results containing all known users, even if that
+            # user does not share a room with the requester.
+            #
+            #search_all_users: true
+
+            # Defines whether to prefer local users in search query results.
+            # If True, local users are more likely to appear above remote users
+            # when searching the user directory. Defaults to false.
+            #
+            # Uncomment to prefer local over remote users in user directory search
+            # results.
+            #
+            #prefer_local_users: true
         """
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 7fe837878d..93aa199119 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -34,7 +34,7 @@ from twisted.internet import defer
 from twisted.internet.abstract import isIPAddress
 from twisted.python import failure
 
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import EduTypes, EventTypes, Membership
 from synapse.api.errors import (
     AuthError,
     Codes,
@@ -44,6 +44,7 @@ from synapse.api.errors import (
     SynapseError,
     UnsupportedRoomVersionError,
 )
+from synapse.api.ratelimiting import Ratelimiter
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase
 from synapse.federation.federation_base import FederationBase, event_from_pdu_json
@@ -869,6 +870,13 @@ class FederationHandlerRegistry:
         # EDU received.
         self._edu_type_to_instance = {}  # type: Dict[str, List[str]]
 
+        # A rate limiter for incoming room key requests per origin.
+        self._room_key_request_rate_limiter = Ratelimiter(
+            clock=self.clock,
+            rate_hz=self.config.rc_key_requests.per_second,
+            burst_count=self.config.rc_key_requests.burst_count,
+        )
+
     def register_edu_handler(
         self, edu_type: str, handler: Callable[[str, JsonDict], Awaitable[None]]
     ):
@@ -917,7 +925,15 @@ class FederationHandlerRegistry:
         self._edu_type_to_instance[edu_type] = instance_names
 
     async def on_edu(self, edu_type: str, origin: str, content: dict):
-        if not self.config.use_presence and edu_type == "m.presence":
+        if not self.config.use_presence and edu_type == EduTypes.Presence:
+            return
+
+        # If the incoming room key requests from a particular origin are over
+        # the limit, drop them.
+        if (
+            edu_type == EduTypes.RoomKeyRequest
+            and not self._room_key_request_rate_limiter.can_do_action(origin)
+        ):
             return
 
         # Temporary patch to drop cross-user key share requests
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 97fc4d0a82..24ebc4b803 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -474,7 +474,7 @@ class FederationSender:
             self._processing_pending_presence = False
 
     def send_presence_to_destinations(
-        self, states: List[UserPresenceState], destinations: List[str]
+        self, states: Iterable[UserPresenceState], destinations: Iterable[str]
     ) -> None:
         """Send the given presence states to the given destinations.
         destinations (list[str])
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index cce83704d4..2cf935f38d 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -484,10 +484,9 @@ class FederationQueryServlet(BaseFederationServlet):
 
     # This is when we receive a server-server Query
     async def on_GET(self, origin, content, query, query_type):
-        return await self.handler.on_query_request(
-            query_type,
-            {k.decode("utf8"): v[0].decode("utf-8") for k, v in query.items()},
-        )
+        args = {k.decode("utf8"): v[0].decode("utf-8") for k, v in query.items()}
+        args["origin"] = origin
+        return await self.handler.on_query_request(query_type, args)
 
 
 class FederationMakeJoinServlet(BaseFederationServlet):
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 571a60b5f8..7db4f48965 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -16,7 +16,9 @@
 import logging
 from typing import TYPE_CHECKING, Any, Dict
 
+from synapse.api.constants import EduTypes
 from synapse.api.errors import SynapseError
+from synapse.api.ratelimiting import Ratelimiter
 from synapse.logging.context import run_in_background
 from synapse.logging.opentracing import (
     get_active_span_text_map,
@@ -25,7 +27,7 @@ from synapse.logging.opentracing import (
     start_active_span,
 )
 from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
-from synapse.types import JsonDict, UserID, get_domain_from_id
+from synapse.types import JsonDict, Requester, UserID, get_domain_from_id
 from synapse.util import json_encoder
 from synapse.util.stringutils import random_string
 
@@ -78,6 +80,12 @@ class DeviceMessageHandler:
                 ReplicationUserDevicesResyncRestServlet.make_client(hs)
             )
 
+        self._ratelimiter = Ratelimiter(
+            clock=hs.get_clock(),
+            rate_hz=hs.config.rc_key_requests.per_second,
+            burst_count=hs.config.rc_key_requests.burst_count,
+        )
+
     async def on_direct_to_device_edu(self, origin: str, content: JsonDict) -> None:
         local_messages = {}
         sender_user_id = content["sender"]
@@ -168,17 +176,25 @@ class DeviceMessageHandler:
 
     async def send_device_message(
         self,
-        sender_user_id: str,
+        requester: Requester,
         message_type: str,
         messages: Dict[str, Dict[str, JsonDict]],
     ) -> None:
+        sender_user_id = requester.user.to_string()
+
         set_tag("number_of_messages", len(messages))
         set_tag("sender", sender_user_id)
         local_messages = {}
         remote_messages = {}  # type: Dict[str, Dict[str, Dict[str, JsonDict]]]
         for user_id, by_device in messages.items():
-            # Temporary patch to disable sending local cross-user key requests.
-            if message_type == "m.room_key_request" and user_id != sender_user_id:
+            # Ratelimit local cross-user key requests by the sending device.
+            if (
+                message_type == EduTypes.RoomKeyRequest
+                and user_id != sender_user_id
+                and self._ratelimiter.can_do_action(
+                    (sender_user_id, requester.device_id)
+                )
+            ):
                 continue
 
             # we use UserID.from_string to catch invalid user ids
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index 3e23f82cf7..f46cab7325 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -17,7 +17,7 @@ import logging
 import random
 from typing import TYPE_CHECKING, Iterable, List, Optional
 
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import EduTypes, EventTypes, Membership
 from synapse.api.errors import AuthError, SynapseError
 from synapse.events import EventBase
 from synapse.handlers.presence import format_user_presence_state
@@ -113,7 +113,7 @@ class EventStreamHandler(BaseHandler):
                     states = await presence_handler.get_states(users)
                     to_add.extend(
                         {
-                            "type": EventTypes.Presence,
+                            "type": EduTypes.Presence,
                             "content": format_user_presence_state(state, time_now),
                         }
                         for state in states
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 78c3e5a10b..71a5076672 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
 
 from twisted.internet import defer
 
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import EduTypes, EventTypes, Membership
 from synapse.api.errors import SynapseError
 from synapse.events.validator import EventValidator
 from synapse.handlers.presence import format_user_presence_state
@@ -412,7 +412,7 @@ class InitialSyncHandler(BaseHandler):
 
             return [
                 {
-                    "type": EventTypes.Presence,
+                    "type": EduTypes.Presence,
                     "content": format_user_presence_state(s, time_now),
                 }
                 for s in states
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 2236595338..41ded62d21 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -387,6 +387,12 @@ class EventCreationHandler:
 
         self.room_invite_state_types = self.hs.config.room_invite_state_types
 
+        self.membership_types_to_include_profile_data_in = (
+            {Membership.JOIN, Membership.INVITE}
+            if self.hs.config.include_profile_data_on_invite
+            else {Membership.JOIN}
+        )
+
         self.send_event = ReplicationSendEventRestServlet.make_client(hs)
 
         # This is only used to get at ratelimit function, and maybe_kick_guest_users
@@ -500,7 +506,7 @@ class EventCreationHandler:
             membership = builder.content.get("membership", None)
             target = UserID.from_string(builder.state_key)
 
-            if membership in {Membership.JOIN, Membership.INVITE}:
+            if membership in self.membership_types_to_include_profile_data_in:
                 # If event doesn't include a display name, add one.
                 profile = self.profile_handler
                 content = builder.content
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index fb85b19770..b6a9ce4f38 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -849,6 +849,9 @@ class PresenceHandler(BasePresenceHandler):
         """Process current state deltas to find new joins that need to be
         handled.
         """
+        # A map of destination to a set of user state that they should receive
+        presence_destinations = {}  # type: Dict[str, Set[UserPresenceState]]
+
         for delta in deltas:
             typ = delta["type"]
             state_key = delta["state_key"]
@@ -858,6 +861,7 @@ class PresenceHandler(BasePresenceHandler):
 
             logger.debug("Handling: %r %r, %s", typ, state_key, event_id)
 
+            # Drop any event that isn't a membership join
             if typ != EventTypes.Member:
                 continue
 
@@ -880,13 +884,38 @@ class PresenceHandler(BasePresenceHandler):
                     # Ignore changes to join events.
                     continue
 
-            await self._on_user_joined_room(room_id, state_key)
+            # Retrieve any user presence state updates that need to be sent as a result,
+            # and the destinations that need to receive it
+            destinations, user_presence_states = await self._on_user_joined_room(
+                room_id, state_key
+            )
+
+            # Insert the destinations and respective updates into our destinations dict
+            for destination in destinations:
+                presence_destinations.setdefault(destination, set()).update(
+                    user_presence_states
+                )
+
+        # Send out user presence updates for each destination
+        for destination, user_state_set in presence_destinations.items():
+            self.federation.send_presence_to_destinations(
+                destinations=[destination], states=user_state_set
+            )
 
-    async def _on_user_joined_room(self, room_id: str, user_id: str) -> None:
+    async def _on_user_joined_room(
+        self, room_id: str, user_id: str
+    ) -> Tuple[List[str], List[UserPresenceState]]:
         """Called when we detect a user joining the room via the current state
-        delta stream.
-        """
+        delta stream. Returns the destinations that need to be updated and the
+        presence updates to send to them.
+
+        Args:
+            room_id: The ID of the room that the user has joined.
+            user_id: The ID of the user that has joined the room.
 
+        Returns:
+            A tuple of destinations and presence updates to send to them.
+        """
         if self.is_mine_id(user_id):
             # If this is a local user then we need to send their presence
             # out to hosts in the room (who don't already have it)
@@ -894,15 +923,15 @@ class PresenceHandler(BasePresenceHandler):
             # TODO: We should be able to filter the hosts down to those that
             # haven't previously seen the user
 
-            state = await self.current_state_for_user(user_id)
-            hosts = await self.state.get_current_hosts_in_room(room_id)
+            remote_hosts = await self.state.get_current_hosts_in_room(room_id)
 
             # Filter out ourselves.
-            hosts = {host for host in hosts if host != self.server_name}
+            filtered_remote_hosts = [
+                host for host in remote_hosts if host != self.server_name
+            ]
 
-            self.federation.send_presence_to_destinations(
-                states=[state], destinations=hosts
-            )
+            state = await self.current_state_for_user(user_id)
+            return filtered_remote_hosts, [state]
         else:
             # A remote user has joined the room, so we need to:
             #   1. Check if this is a new server in the room
@@ -915,6 +944,8 @@ class PresenceHandler(BasePresenceHandler):
             # TODO: Check that this is actually a new server joining the
             # room.
 
+            remote_host = get_domain_from_id(user_id)
+
             users = await self.state.get_current_users_in_room(room_id)
             user_ids = list(filter(self.is_mine_id, users))
 
@@ -934,10 +965,7 @@ class PresenceHandler(BasePresenceHandler):
                 or state.status_msg is not None
             ]
 
-            if states:
-                self.federation.send_presence_to_destinations(
-                    states=states, destinations=[get_domain_from_id(user_id)]
-                )
+            return [remote_host], states
 
 
 def should_notify(old_state, new_state):
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 2f62d84fb5..dd59392bda 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -310,6 +310,15 @@ class ProfileHandler(BaseHandler):
         await self._update_join_states(requester, target_user)
 
     async def on_profile_query(self, args: JsonDict) -> JsonDict:
+        """Handles federation profile query requests."""
+
+        if not self.hs.config.allow_profile_lookup_over_federation:
+            raise SynapseError(
+                403,
+                "Profile lookup over federation is disabled on this homeserver",
+                Codes.FORBIDDEN,
+            )
+
         user = UserID.from_string(args["user_id"])
         if not self.hs.is_mine(user):
             raise SynapseError(400, "User is not hosted on this homeserver")
diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py
index 7a0dbb5b1a..8af53b4f28 100644
--- a/synapse/replication/http/federation.py
+++ b/synapse/replication/http/federation.py
@@ -213,8 +213,9 @@ class ReplicationGetQueryRestServlet(ReplicationEndpoint):
             content = parse_json_object_from_request(request)
 
             args = content["args"]
+            args["origin"] = content["origin"]
 
-        logger.info("Got %r query", query_type)
+        logger.info("Got %r query from %s", query_type, args["origin"])
 
         result = await self.registry.on_query(query_type, args)
 
diff --git a/synapse/res/templates/sso_auth_account_details.html b/synapse/res/templates/sso_auth_account_details.html
index f4fdc40b22..00e1dcdbb8 100644
--- a/synapse/res/templates/sso_auth_account_details.html
+++ b/synapse/res/templates/sso_auth_account_details.html
@@ -145,7 +145,7 @@
         <input type="submit" value="Continue" class="primary-button">
         {% if user_attributes.avatar_url or user_attributes.display_name or user_attributes.emails %}
         <section class="idp-pick-details">
-          <h2><img src="{{ idp.idp_icon | mxc_to_http(24, 24) }}"/>Information from {{ idp.idp_name }}</h2>
+          <h2>{% if idp.idp_icon %}<img src="{{ idp.idp_icon | mxc_to_http(24, 24) }}"/>{% endif %}Information from {{ idp.idp_name }}</h2>
           {% if user_attributes.avatar_url %}
           <label class="idp-detail idp-avatar" for="idp-avatar">
             <div class="check-row">
diff --git a/synapse/rest/client/v2_alpha/sendtodevice.py b/synapse/rest/client/v2_alpha/sendtodevice.py
index a3dee14ed4..79c1b526ee 100644
--- a/synapse/rest/client/v2_alpha/sendtodevice.py
+++ b/synapse/rest/client/v2_alpha/sendtodevice.py
@@ -56,10 +56,8 @@ class SendToDeviceRestServlet(servlet.RestServlet):
         content = parse_json_object_from_request(request)
         assert_params_in_dict(content, ("messages",))
 
-        sender_user_id = requester.user.to_string()
-
         await self.device_message_handler.send_device_message(
-            sender_user_id, message_type, content["messages"]
+            requester, message_type, content["messages"]
         )
 
         response = (200, {})  # type: Tuple[int, dict]
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index a0162d4255..3375455c43 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -509,7 +509,7 @@ class MediaRepository:
         t_height: int,
         t_method: str,
         t_type: str,
-        url_cache: str,
+        url_cache: Optional[str],
     ) -> Optional[str]:
         input_path = await self.media_storage.ensure_media_is_in_local_cache(
             FileInfo(None, media_id, url_cache=url_cache)
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 1057e638be..b1b1c9e6ec 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -244,7 +244,7 @@ class MediaStorage:
                     await consumer.wait()
                 return local_path
 
-        raise Exception("file could not be found")
+        raise NotFoundError()
 
     def _file_info_to_path(self, file_info: FileInfo) -> str:
         """Converts file_info into a relative path.
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index d653a58be9..3ab90e9f9b 100644
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -114,6 +114,7 @@ class ThumbnailResource(DirectServeJsonResource):
             m_type,
             thumbnail_infos,
             media_id,
+            media_id,
             url_cache=media_info["url_cache"],
             server_name=None,
         )
@@ -269,6 +270,7 @@ class ThumbnailResource(DirectServeJsonResource):
             method,
             m_type,
             thumbnail_infos,
+            media_id,
             media_info["filesystem_id"],
             url_cache=None,
             server_name=server_name,
@@ -282,6 +284,7 @@ class ThumbnailResource(DirectServeJsonResource):
         desired_method: str,
         desired_type: str,
         thumbnail_infos: List[Dict[str, Any]],
+        media_id: str,
         file_id: str,
         url_cache: Optional[str] = None,
         server_name: Optional[str] = None,
@@ -317,8 +320,59 @@ class ThumbnailResource(DirectServeJsonResource):
                 return
 
             responder = await self.media_storage.fetch_media(file_info)
+            if responder:
+                await respond_with_responder(
+                    request,
+                    responder,
+                    file_info.thumbnail_type,
+                    file_info.thumbnail_length,
+                )
+                return
+
+            # If we can't find the thumbnail we regenerate it. This can happen
+            # if e.g. we've deleted the thumbnails but still have the original
+            # image somewhere.
+            #
+            # Since we have an entry for the thumbnail in the DB we a) know we
+            # have have successfully generated the thumbnail in the past (so we
+            # don't need to worry about repeatedly failing to generate
+            # thumbnails), and b) have already calculated that appropriate
+            # width/height/method so we can just call the "generate exact"
+            # methods.
+
+            # First let's check that we do actually have the original image
+            # still. This will throw a 404 if we don't.
+            # TODO: We should refetch the thumbnails for remote media.
+            await self.media_storage.ensure_media_is_in_local_cache(
+                FileInfo(server_name, file_id, url_cache=url_cache)
+            )
+
+            if server_name:
+                await self.media_repo.generate_remote_exact_thumbnail(
+                    server_name,
+                    file_id=file_id,
+                    media_id=media_id,
+                    t_width=file_info.thumbnail_width,
+                    t_height=file_info.thumbnail_height,
+                    t_method=file_info.thumbnail_method,
+                    t_type=file_info.thumbnail_type,
+                )
+            else:
+                await self.media_repo.generate_local_exact_thumbnail(
+                    media_id=media_id,
+                    t_width=file_info.thumbnail_width,
+                    t_height=file_info.thumbnail_height,
+                    t_method=file_info.thumbnail_method,
+                    t_type=file_info.thumbnail_type,
+                    url_cache=url_cache,
+                )
+
+            responder = await self.media_storage.fetch_media(file_info)
             await respond_with_responder(
-                request, responder, file_info.thumbnail_type, file_info.thumbnail_length
+                request,
+                responder,
+                file_info.thumbnail_type,
+                file_info.thumbnail_length,
             )
         else:
             logger.info("Failed to find any generated thumbnails")
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index a0313c3ccf..9ee642c668 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -344,16 +344,16 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
         thumbnail_method,
         thumbnail_length,
     ):
-        await self.db_pool.simple_insert(
-            "local_media_repository_thumbnails",
-            {
+        await self.db_pool.simple_upsert(
+            table="local_media_repository_thumbnails",
+            keyvalues={
                 "media_id": media_id,
                 "thumbnail_width": thumbnail_width,
                 "thumbnail_height": thumbnail_height,
                 "thumbnail_method": thumbnail_method,
                 "thumbnail_type": thumbnail_type,
-                "thumbnail_length": thumbnail_length,
             },
+            values={"thumbnail_length": thumbnail_length},
             desc="store_local_thumbnail",
         )
 
@@ -498,18 +498,18 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
         thumbnail_method,
         thumbnail_length,
     ):
-        await self.db_pool.simple_insert(
-            "remote_media_cache_thumbnails",
-            {
+        await self.db_pool.simple_upsert(
+            table="remote_media_cache_thumbnails",
+            keyvalues={
                 "media_origin": origin,
                 "media_id": media_id,
                 "thumbnail_width": thumbnail_width,
                 "thumbnail_height": thumbnail_height,
                 "thumbnail_method": thumbnail_method,
                 "thumbnail_type": thumbnail_type,
-                "thumbnail_length": thumbnail_length,
-                "filesystem_id": filesystem_id,
             },
+            values={"thumbnail_length": thumbnail_length},
+            insertion_values={"filesystem_id": filesystem_id},
             desc="store_remote_media_thumbnail",
         )
 
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 63f88eac51..02ee15676c 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -556,6 +556,11 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
 
+        self._prefer_local_users_in_search = (
+            hs.config.user_directory_search_prefer_local_users
+        )
+        self._server_name = hs.config.server_name
+
     async def remove_from_user_dir(self, user_id: str) -> None:
         def _remove_from_user_dir_txn(txn):
             self.db_pool.simple_delete_txn(
@@ -754,9 +759,24 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 )
             """
 
+        # We allow manipulating the ranking algorithm by injecting statements
+        # based on config options.
+        additional_ordering_statements = []
+        ordering_arguments = ()
+
         if isinstance(self.database_engine, PostgresEngine):
             full_query, exact_query, prefix_query = _parse_query_postgres(search_term)
 
+            # If enabled, this config option will rank local users higher than those on
+            # remote instances.
+            if self._prefer_local_users_in_search:
+                # This statement checks whether a given user's user ID contains a server name
+                # that matches the local server
+                statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)"
+                additional_ordering_statements.append(statement)
+
+                ordering_arguments += ("%:" + self._server_name,)
+
             # We order by rank and then if they have profile info
             # The ranking algorithm is hand tweaked for "best" results. Broadly
             # the idea is we give a higher weight to exact matches.
@@ -767,7 +787,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 FROM user_directory_search as t
                 INNER JOIN user_directory AS d USING (user_id)
                 WHERE
-                    %s
+                    %(where_clause)s
                     AND vector @@ to_tsquery('simple', ?)
                 ORDER BY
                     (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
@@ -787,33 +807,54 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                             8
                         )
                     )
+                    %(order_case_statements)s
                     DESC,
                     display_name IS NULL,
                     avatar_url IS NULL
                 LIMIT ?
-            """ % (
-                where_clause,
+            """ % {
+                "where_clause": where_clause,
+                "order_case_statements": " ".join(additional_ordering_statements),
+            }
+            args = (
+                join_args
+                + (full_query, exact_query, prefix_query)
+                + ordering_arguments
+                + (limit + 1,)
             )
-            args = join_args + (full_query, exact_query, prefix_query, limit + 1)
         elif isinstance(self.database_engine, Sqlite3Engine):
             search_query = _parse_query_sqlite(search_term)
 
+            # If enabled, this config option will rank local users higher than those on
+            # remote instances.
+            if self._prefer_local_users_in_search:
+                # This statement checks whether a given user's user ID contains a server name
+                # that matches the local server
+                #
+                # Note that we need to include a comma at the end for valid SQL
+                statement = "user_id LIKE ? DESC,"
+                additional_ordering_statements.append(statement)
+
+                ordering_arguments += ("%:" + self._server_name,)
+
             sql = """
                 SELECT d.user_id AS user_id, display_name, avatar_url
                 FROM user_directory_search as t
                 INNER JOIN user_directory AS d USING (user_id)
                 WHERE
-                    %s
+                    %(where_clause)s
                     AND value MATCH ?
                 ORDER BY
                     rank(matchinfo(user_directory_search)) DESC,
+                    %(order_statements)s
                     display_name IS NULL,
                     avatar_url IS NULL
                 LIMIT ?
-            """ % (
-                where_clause,
-            )
-            args = join_args + (search_query, limit + 1)
+            """ % {
+                "where_clause": where_clause,
+                "order_statements": " ".join(additional_ordering_statements),
+            }
+            args = join_args + (search_query,) + ordering_arguments + (limit + 1,)
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index be2ee26f07..996c614198 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -521,7 +521,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(expected_state.state, PresenceState.ONLINE)
         self.federation_sender.send_presence_to_destinations.assert_called_once_with(
-            destinations=["server2"], states=[expected_state]
+            destinations=["server2"], states={expected_state}
         )
 
         #
@@ -533,7 +533,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
 
         self.federation_sender.send_presence.assert_not_called()
         self.federation_sender.send_presence_to_destinations.assert_called_once_with(
-            destinations=["server3"], states=[expected_state]
+            destinations=["server3"], states={expected_state}
         )
 
     def test_remote_gets_presence_when_local_user_joins(self):
@@ -584,8 +584,14 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
             self.presence_handler.current_state_for_user("@test2:server")
         )
         self.assertEqual(expected_state.state, PresenceState.ONLINE)
-        self.federation_sender.send_presence_to_destinations.assert_called_once_with(
-            destinations={"server2", "server3"}, states=[expected_state]
+        self.assertEqual(
+            self.federation_sender.send_presence_to_destinations.call_count, 2
+        )
+        self.federation_sender.send_presence_to_destinations.assert_any_call(
+            destinations=["server3"], states={expected_state}
+        )
+        self.federation_sender.send_presence_to_destinations.assert_any_call(
+            destinations=["server2"], states={expected_state}
         )
 
     def _add_new_user(self, room_id, user_id):
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index 18ca8b84f5..75c6a4e21c 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -161,7 +161,11 @@ class ProfileTestCase(unittest.HomeserverTestCase):
 
         response = self.get_success(
             self.query_handlers["profile"](
-                {"user_id": "@caroline:test", "field": "displayname"}
+                {
+                    "user_id": "@caroline:test",
+                    "field": "displayname",
+                    "origin": "servername.tld",
+                }
             )
         )
 
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index 3572e54c5d..98b2f5b383 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -18,6 +18,7 @@ from twisted.internet import defer
 
 import synapse.rest.admin
 from synapse.api.constants import EventTypes, RoomEncryptionAlgorithms, UserTypes
+from synapse.api.room_versions import RoomVersion, RoomVersions
 from synapse.rest.client.v1 import login, room
 from synapse.rest.client.v2_alpha import user_directory
 from synapse.storage.roommember import ProfileInfo
@@ -46,6 +47,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
     def prepare(self, reactor, clock, hs):
         self.store = hs.get_datastore()
         self.handler = hs.get_user_directory_handler()
+        self.event_builder_factory = self.hs.get_event_builder_factory()
+        self.event_creation_handler = self.hs.get_event_creation_handler()
 
     def test_handle_local_profile_change_with_support_user(self):
         support_user_id = "@support:test"
@@ -547,6 +550,100 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         s = self.get_success(self.handler.search_users(u1, u4, 10))
         self.assertEqual(len(s["results"]), 1)
 
+    @override_config(
+        {
+            "user_directory": {
+                "enabled": True,
+                "search_all_users": True,
+                "prefer_local_users": True,
+            }
+        }
+    )
+    def test_prefer_local_users(self):
+        """Tests that local users are shown higher in search results when
+        user_directory.prefer_local_users is True.
+        """
+        # Create a room and few users to test the directory with
+        searching_user = self.register_user("searcher", "password")
+        searching_user_tok = self.login("searcher", "password")
+
+        room_id = self.helper.create_room_as(
+            searching_user,
+            room_version=RoomVersions.V1.identifier,
+            tok=searching_user_tok,
+        )
+
+        # Create a few local users and join them to the room
+        local_user_1 = self.register_user("user_xxxxx", "password")
+        local_user_2 = self.register_user("user_bbbbb", "password")
+        local_user_3 = self.register_user("user_zzzzz", "password")
+
+        self._add_user_to_room(room_id, RoomVersions.V1, local_user_1)
+        self._add_user_to_room(room_id, RoomVersions.V1, local_user_2)
+        self._add_user_to_room(room_id, RoomVersions.V1, local_user_3)
+
+        # Create a few "remote" users and join them to the room
+        remote_user_1 = "@user_aaaaa:remote_server"
+        remote_user_2 = "@user_yyyyy:remote_server"
+        remote_user_3 = "@user_ccccc:remote_server"
+        self._add_user_to_room(room_id, RoomVersions.V1, remote_user_1)
+        self._add_user_to_room(room_id, RoomVersions.V1, remote_user_2)
+        self._add_user_to_room(room_id, RoomVersions.V1, remote_user_3)
+
+        local_users = [local_user_1, local_user_2, local_user_3]
+        remote_users = [remote_user_1, remote_user_2, remote_user_3]
+
+        # Populate the user directory via background update
+        self._add_background_updates()
+        while not self.get_success(
+            self.store.db_pool.updates.has_completed_background_updates()
+        ):
+            self.get_success(
+                self.store.db_pool.updates.do_next_background_update(100), by=0.1
+            )
+
+        # The local searching user searches for the term "user", which other users have
+        # in their user id
+        results = self.get_success(
+            self.handler.search_users(searching_user, "user", 20)
+        )["results"]
+        received_user_id_ordering = [result["user_id"] for result in results]
+
+        # Typically we'd expect Synapse to return users in lexicographical order,
+        # assuming they have similar User IDs/display names, and profile information.
+
+        # Check that the order of returned results using our module is as we expect,
+        # i.e our local users show up first, despite all users having lexographically mixed
+        # user IDs.
+        [self.assertIn(user, local_users) for user in received_user_id_ordering[:3]]
+        [self.assertIn(user, remote_users) for user in received_user_id_ordering[3:]]
+
+    def _add_user_to_room(
+        self,
+        room_id: str,
+        room_version: RoomVersion,
+        user_id: str,
+    ):
+        # Add a user to the room.
+        builder = self.event_builder_factory.for_room_version(
+            room_version,
+            {
+                "type": "m.room.member",
+                "sender": user_id,
+                "state_key": user_id,
+                "room_id": room_id,
+                "content": {"membership": "join"},
+            },
+        )
+
+        event, context = self.get_success(
+            self.event_creation_handler.create_new_client_event(builder)
+        )
+
+        self.get_success(
+            self.hs.get_storage().persistence.persist_event(event, context)
+        )
+
 
 class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
     user_id = "@test:test"
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 0789b12392..36d1e6bc4a 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -231,9 +231,11 @@ class MediaRepoTests(unittest.HomeserverTestCase):
 
     def prepare(self, reactor, clock, hs):
 
-        self.media_repo = hs.get_media_repository_resource()
-        self.download_resource = self.media_repo.children[b"download"]
-        self.thumbnail_resource = self.media_repo.children[b"thumbnail"]
+        media_resource = hs.get_media_repository_resource()
+        self.download_resource = media_resource.children[b"download"]
+        self.thumbnail_resource = media_resource.children[b"thumbnail"]
+        self.store = hs.get_datastore()
+        self.media_repo = hs.get_media_repository()
 
         self.media_id = "example.com/12345"
 
@@ -357,6 +359,67 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         """
         self._test_thumbnail("scale", None, False)
 
+    def test_thumbnail_repeated_thumbnail(self):
+        """Test that fetching the same thumbnail works, and deleting the on disk
+        thumbnail regenerates it.
+        """
+        self._test_thumbnail(
+            "scale", self.test_image.expected_scaled, self.test_image.expected_found
+        )
+
+        if not self.test_image.expected_found:
+            return
+
+        # Fetching again should work, without re-requesting the image from the
+        # remote.
+        params = "?width=32&height=32&method=scale"
+        channel = make_request(
+            self.reactor,
+            FakeSite(self.thumbnail_resource),
+            "GET",
+            self.media_id + params,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        self.assertEqual(channel.code, 200)
+        if self.test_image.expected_scaled:
+            self.assertEqual(
+                channel.result["body"],
+                self.test_image.expected_scaled,
+                channel.result["body"],
+            )
+
+        # Deleting the thumbnail on disk then re-requesting it should work as
+        # Synapse should regenerate missing thumbnails.
+        origin, media_id = self.media_id.split("/")
+        info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
+        file_id = info["filesystem_id"]
+
+        thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
+            origin, file_id
+        )
+        shutil.rmtree(thumbnail_dir, ignore_errors=True)
+
+        channel = make_request(
+            self.reactor,
+            FakeSite(self.thumbnail_resource),
+            "GET",
+            self.media_id + params,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        self.assertEqual(channel.code, 200)
+        if self.test_image.expected_scaled:
+            self.assertEqual(
+                channel.result["body"],
+                self.test_image.expected_scaled,
+                channel.result["body"],
+            )
+
     def _test_thumbnail(self, method, expected_body, expected_found):
         params = "?width=32&height=32&method=" + method
         channel = make_request(