summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
Diffstat (limited to 'synapse')
-rw-r--r--synapse/__init__.py2
-rw-r--r--synapse/api/urls.py2
-rw-r--r--synapse/config/_base.py11
-rw-r--r--synapse/config/emailconfig.py8
-rw-r--r--synapse/config/oidc_config.py60
-rw-r--r--synapse/config/registration.py21
-rw-r--r--synapse/config/saml2_config.py2
-rw-r--r--synapse/config/server.py26
-rw-r--r--synapse/config/sso.py13
-rw-r--r--synapse/federation/federation_server.py2
-rw-r--r--synapse/federation/transport/server.py2
-rw-r--r--synapse/handlers/cas_handler.py4
-rw-r--r--synapse/handlers/identity.py2
-rw-r--r--synapse/handlers/oidc_handler.py3
-rw-r--r--synapse/handlers/room.py2
-rw-r--r--synapse/handlers/saml_handler.py4
-rw-r--r--synapse/handlers/sso.py5
-rw-r--r--synapse/http/endpoint.py79
-rw-r--r--synapse/python_dependencies.py4
-rw-r--r--synapse/res/templates/sso_login_idp_picker.html3
-rw-r--r--synapse/rest/client/v1/room.py3
-rw-r--r--synapse/rest/well_known.py4
-rw-r--r--synapse/server_notices/resource_limits_server_notices.py3
-rw-r--r--synapse/server_notices/server_notices_manager.py3
-rw-r--r--synapse/storage/databases/main/account_data.py2
-rw-r--r--synapse/storage/databases/main/receipts.py4
-rw-r--r--synapse/storage/databases/main/room.py6
-rw-r--r--synapse/storage/databases/main/schema/delta/59/07shard_account_data_fix.sql18
-rw-r--r--synapse/storage/util/id_generators.py6
-rw-r--r--synapse/storage/util/sequence.py56
-rw-r--r--synapse/types.py2
-rw-r--r--synapse/util/iterutils.py2
-rw-r--r--synapse/util/stringutils.py92
33 files changed, 288 insertions, 168 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 193adca624..3cd682f9e7 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -48,7 +48,7 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.25.0"
+__version__ = "1.26.0rc2"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when
diff --git a/synapse/api/urls.py b/synapse/api/urls.py
index 6379c86dde..e36aeef31f 100644
--- a/synapse/api/urls.py
+++ b/synapse/api/urls.py
@@ -42,8 +42,6 @@ class ConsentURIBuilder:
         """
         if hs_config.form_secret is None:
             raise ConfigError("form_secret not set in config")
-        if hs_config.public_baseurl is None:
-            raise ConfigError("public_baseurl not set in config")
 
         self._hmac_secret = hs_config.form_secret.encode("utf-8")
         self._public_baseurl = hs_config.public_baseurl
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 2931a88207..94144efc87 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -252,11 +252,12 @@ class Config:
         env = jinja2.Environment(loader=loader, autoescape=autoescape)
 
         # Update the environment with our custom filters
-        env.filters.update({"format_ts": _format_ts_filter})
-        if self.public_baseurl:
-            env.filters.update(
-                {"mxc_to_http": _create_mxc_to_http_filter(self.public_baseurl)}
-            )
+        env.filters.update(
+            {
+                "format_ts": _format_ts_filter,
+                "mxc_to_http": _create_mxc_to_http_filter(self.public_baseurl),
+            }
+        )
 
         for filename in filenames:
             # Load the template
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index d4328c46b9..6a487afd34 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -166,11 +166,6 @@ class EmailConfig(Config):
             if not self.email_notif_from:
                 missing.append("email.notif_from")
 
-            # public_baseurl is required to build password reset and validation links that
-            # will be emailed to users
-            if config.get("public_baseurl") is None:
-                missing.append("public_baseurl")
-
             if missing:
                 raise ConfigError(
                     MISSING_PASSWORD_RESET_CONFIG_ERROR % (", ".join(missing),)
@@ -269,9 +264,6 @@ class EmailConfig(Config):
             if not self.email_notif_from:
                 missing.append("email.notif_from")
 
-            if config.get("public_baseurl") is None:
-                missing.append("public_baseurl")
-
             if missing:
                 raise ConfigError(
                     "email.enable_notifs is True but required keys are missing: %s"
diff --git a/synapse/config/oidc_config.py b/synapse/config/oidc_config.py
index 80a24cfbc9..d58a83be7f 100644
--- a/synapse/config/oidc_config.py
+++ b/synapse/config/oidc_config.py
@@ -23,6 +23,7 @@ from synapse.config._util import validate_config
 from synapse.python_dependencies import DependencyException, check_requirements
 from synapse.types import Collection, JsonDict
 from synapse.util.module_loader import load_module
+from synapse.util.stringutils import parse_and_validate_mxc_uri
 
 from ._base import Config, ConfigError
 
@@ -43,8 +44,6 @@ class OIDCConfig(Config):
             raise ConfigError(e.message) from e
 
         public_baseurl = self.public_baseurl
-        if public_baseurl is None:
-            raise ConfigError("oidc_config requires a public_baseurl to be set")
         self.oidc_callback_url = public_baseurl + "_synapse/oidc/callback"
 
     @property
@@ -68,6 +67,12 @@ class OIDCConfig(Config):
         #   idp_name: A user-facing name for this identity provider, which is used to
         #       offer the user a choice of login mechanisms.
         #
+        #   idp_icon: An optional icon for this identity provider, which is presented
+        #       by identity picker pages. If given, must be an MXC URI of the format
+        #       mxc://<server-name>/<media-id>. (An easy way to obtain such an MXC URI
+        #       is to upload an image to an (unencrypted) room and then copy the "url"
+        #       from the source of the event.)
+        #
         #   discover: set to 'false' to disable the use of the OIDC discovery mechanism
         #       to discover endpoints. Defaults to true.
         #
@@ -152,13 +157,16 @@ class OIDCConfig(Config):
         #
         # For backwards compatibility, it is also possible to configure a single OIDC
         # provider via an 'oidc_config' setting. This is now deprecated and admins are
-        # advised to migrate to the 'oidc_providers' format.
+        # advised to migrate to the 'oidc_providers' format. (When doing that migration,
+        # use 'oidc' for the idp_id to ensure that existing users continue to be
+        # recognised.)
         #
         oidc_providers:
           # Generic example
           #
           #- idp_id: my_idp
           #  idp_name: "My OpenID provider"
+          #  idp_icon: "mxc://example.com/mediaid"
           #  discover: false
           #  issuer: "https://accounts.example.com/"
           #  client_id: "provided-by-your-issuer"
@@ -182,8 +190,8 @@ class OIDCConfig(Config):
 
           # For use with Github
           #
-          #- idp_id: google
-          #  idp_name: Google
+          #- idp_id: github
+          #  idp_name: Github
           #  discover: false
           #  issuer: "https://github.com/"
           #  client_id: "your-client-id" # TO BE FILLED
@@ -207,8 +215,11 @@ OIDC_PROVIDER_CONFIG_SCHEMA = {
     "type": "object",
     "required": ["issuer", "client_id", "client_secret"],
     "properties": {
+        # TODO: fix the maxLength here depending on what MSC2528 decides
+        #   remember that we prefix the ID given here with `oidc-`
         "idp_id": {"type": "string", "minLength": 1, "maxLength": 128},
         "idp_name": {"type": "string"},
+        "idp_icon": {"type": "string"},
         "discover": {"type": "boolean"},
         "issuer": {"type": "string"},
         "client_id": {"type": "string"},
@@ -327,20 +338,50 @@ def _parse_oidc_config_dict(
             config_path + ("user_mapping_provider", "module"),
         )
 
-    # MSC2858 will appy certain limits in what can be used as an IdP id, so let's
+    # MSC2858 will apply certain limits in what can be used as an IdP id, so let's
     # enforce those limits now.
+    # TODO: factor out this stuff to a generic function
     idp_id = oidc_config.get("idp_id", "oidc")
-    valid_idp_chars = set(string.ascii_letters + string.digits + "-._~")
+
+    # TODO: update this validity check based on what MSC2858 decides.
+    valid_idp_chars = set(string.ascii_lowercase + string.digits + "-._")
 
     if any(c not in valid_idp_chars for c in idp_id):
         raise ConfigError(
-            'idp_id may only contain A-Z, a-z, 0-9, "-", ".", "_", "~"',
+            'idp_id may only contain a-z, 0-9, "-", ".", "_"',
             config_path + ("idp_id",),
         )
 
+    if idp_id[0] not in string.ascii_lowercase:
+        raise ConfigError(
+            "idp_id must start with a-z", config_path + ("idp_id",),
+        )
+
+    # prefix the given IDP with a prefix specific to the SSO mechanism, to avoid
+    # clashes with other mechs (such as SAML, CAS).
+    #
+    # We allow "oidc" as an exception so that people migrating from old-style
+    # "oidc_config" format (which has long used "oidc" as its idp_id) can migrate to
+    # a new-style "oidc_providers" entry without changing the idp_id for their provider
+    # (and thereby invalidating their user_external_ids data).
+
+    if idp_id != "oidc":
+        idp_id = "oidc-" + idp_id
+
+    # MSC2858 also specifies that the idp_icon must be a valid MXC uri
+    idp_icon = oidc_config.get("idp_icon")
+    if idp_icon is not None:
+        try:
+            parse_and_validate_mxc_uri(idp_icon)
+        except ValueError as e:
+            raise ConfigError(
+                "idp_icon must be a valid MXC URI", config_path + ("idp_icon",)
+            ) from e
+
     return OidcProviderConfig(
         idp_id=idp_id,
         idp_name=oidc_config.get("idp_name", "OIDC"),
+        idp_icon=idp_icon,
         discover=oidc_config.get("discover", True),
         issuer=oidc_config["issuer"],
         client_id=oidc_config["client_id"],
@@ -368,6 +409,9 @@ class OidcProviderConfig:
     # user-facing name for this identity provider.
     idp_name = attr.ib(type=str)
 
+    # Optional MXC URI for icon for this IdP.
+    idp_icon = attr.ib(type=Optional[str])
+
     # whether the OIDC discovery mechanism is used to discover endpoints
     discover = attr.ib(type=bool)
 
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index 740c3fc1b1..4bfc69cb7a 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -49,10 +49,6 @@ class AccountValidityConfig(Config):
 
             self.startup_job_max_delta = self.period * 10.0 / 100.0
 
-        if self.renew_by_email_enabled:
-            if "public_baseurl" not in synapse_config:
-                raise ConfigError("Can't send renewal emails without 'public_baseurl'")
-
         template_dir = config.get("template_dir")
 
         if not template_dir:
@@ -109,13 +105,6 @@ class RegistrationConfig(Config):
         account_threepid_delegates = config.get("account_threepid_delegates") or {}
         self.account_threepid_delegate_email = account_threepid_delegates.get("email")
         self.account_threepid_delegate_msisdn = account_threepid_delegates.get("msisdn")
-        if self.account_threepid_delegate_msisdn and not self.public_baseurl:
-            raise ConfigError(
-                "The configuration option `public_baseurl` is required if "
-                "`account_threepid_delegate.msisdn` is set, such that "
-                "clients know where to submit validation tokens to. Please "
-                "configure `public_baseurl`."
-            )
 
         self.default_identity_server = config.get("default_identity_server")
         self.allow_guest_access = config.get("allow_guest_access", False)
@@ -240,8 +229,9 @@ class RegistrationConfig(Config):
           # send an email to the account's email address with a renewal link. By
           # default, no such emails are sent.
           #
-          # If you enable this setting, you will also need to fill out the 'email' and
-          # 'public_baseurl' configuration sections.
+          # If you enable this setting, you will also need to fill out the 'email'
+          # configuration section. You should also check that 'public_baseurl' is set
+          # correctly.
           #
           #renew_at: 1w
 
@@ -332,8 +322,7 @@ class RegistrationConfig(Config):
         # The identity server which we suggest that clients should use when users log
         # in on this server.
         #
-        # (By default, no suggestion is made, so it is left up to the client.
-        # This setting is ignored unless public_baseurl is also set.)
+        # (By default, no suggestion is made, so it is left up to the client.)
         #
         #default_identity_server: https://matrix.org
 
@@ -358,8 +347,6 @@ class RegistrationConfig(Config):
         # by the Matrix Identity Service API specification:
         # https://matrix.org/docs/spec/identity_service/latest
         #
-        # If a delegate is specified, the config option public_baseurl must also be filled out.
-        #
         account_threepid_delegates:
             #email: https://example.com     # Delegate email sending to example.com
             #msisdn: http://localhost:8090  # Delegate SMS sending to this local process
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index 7b97d4f114..f33dfa0d6a 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -189,8 +189,6 @@ class SAML2Config(Config):
         import saml2
 
         public_baseurl = self.public_baseurl
-        if public_baseurl is None:
-            raise ConfigError("saml2_config requires a public_baseurl to be set")
 
         if self.saml2_grandfathered_mxid_source_attribute:
             optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute)
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 7242a4aa8e..47a0370173 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -26,7 +26,7 @@ import yaml
 from netaddr import IPSet
 
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
-from synapse.http.endpoint import parse_and_validate_server_name
+from synapse.util.stringutils import parse_and_validate_server_name
 
 from ._base import Config, ConfigError
 
@@ -161,7 +161,11 @@ class ServerConfig(Config):
         self.print_pidfile = config.get("print_pidfile")
         self.user_agent_suffix = config.get("user_agent_suffix")
         self.use_frozen_dicts = config.get("use_frozen_dicts", False)
-        self.public_baseurl = config.get("public_baseurl")
+        self.public_baseurl = config.get("public_baseurl") or "https://%s/" % (
+            self.server_name,
+        )
+        if self.public_baseurl[-1] != "/":
+            self.public_baseurl += "/"
 
         # Whether to enable user presence.
         self.use_presence = config.get("use_presence", True)
@@ -317,9 +321,6 @@ class ServerConfig(Config):
         # Always blacklist 0.0.0.0, ::
         self.federation_ip_range_blacklist.update(["0.0.0.0", "::"])
 
-        if self.public_baseurl is not None:
-            if self.public_baseurl[-1] != "/":
-                self.public_baseurl += "/"
         self.start_pushers = config.get("start_pushers", True)
 
         # (undocumented) option for torturing the worker-mode replication a bit,
@@ -740,11 +741,16 @@ class ServerConfig(Config):
         #
         #web_client_location: https://riot.example.com/
 
-        # The public-facing base URL that clients use to access this HS
-        # (not including _matrix/...). This is the same URL a user would
-        # enter into the 'custom HS URL' field on their client. If you
-        # use synapse with a reverse proxy, this should be the URL to reach
-        # synapse via the proxy.
+        # The public-facing base URL that clients use to access this Homeserver (not
+        # including _matrix/...). This is the same URL a user might enter into the
+        # 'Custom Homeserver URL' field on their client. If you use Synapse with a
+        # reverse proxy, this should be the URL to reach Synapse via the proxy.
+        # Otherwise, it should be the URL to reach Synapse's client HTTP listener (see
+        # 'listeners' below).
+        #
+        # If this is left unset, it defaults to 'https://<server_name>/'. (Note that
+        # that will not work unless you configure Synapse or a reverse-proxy to listen
+        # on port 443.)
         #
         #public_baseurl: https://example.com/
 
diff --git a/synapse/config/sso.py b/synapse/config/sso.py
index 366f0d4698..59be825532 100644
--- a/synapse/config/sso.py
+++ b/synapse/config/sso.py
@@ -64,11 +64,8 @@ class SSOConfig(Config):
         # gracefully to the client). This would make it pointless to ask the user for
         # confirmation, since the URL the confirmation page would be showing wouldn't be
         # the client's.
-        # public_baseurl is an optional setting, so we only add the fallback's URL to the
-        # list if it's provided (because we can't figure out what that URL is otherwise).
-        if self.public_baseurl:
-            login_fallback_url = self.public_baseurl + "_matrix/static/client/login"
-            self.sso_client_whitelist.append(login_fallback_url)
+        login_fallback_url = self.public_baseurl + "_matrix/static/client/login"
+        self.sso_client_whitelist.append(login_fallback_url)
 
     def generate_config_section(self, **kwargs):
         return """\
@@ -86,9 +83,9 @@ class SSOConfig(Config):
             # phishing attacks from evil.site. To avoid this, include a slash after the
             # hostname: "https://my.client/".
             #
-            # If public_baseurl is set, then the login fallback page (used by clients
-            # that don't natively support the required login flows) is whitelisted in
-            # addition to any URLs in this list.
+            # The login fallback page (used by clients that don't natively support the
+            # required login flows) is automatically whitelisted in addition to any URLs
+            # in this list.
             #
             # By default, this list is empty.
             #
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index e5339aca23..171d25c945 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -49,7 +49,6 @@ from synapse.events import EventBase
 from synapse.federation.federation_base import FederationBase, event_from_pdu_json
 from synapse.federation.persistence import TransactionActions
 from synapse.federation.units import Edu, Transaction
-from synapse.http.endpoint import parse_server_name
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import (
     make_deferred_yieldable,
@@ -66,6 +65,7 @@ from synapse.types import JsonDict, get_domain_from_id
 from synapse.util import glob_to_regex, json_decoder, unwrapFirstError
 from synapse.util.async_helpers import Linearizer, concurrently_execute
 from synapse.util.caches.response_cache import ResponseCache
+from synapse.util.stringutils import parse_server_name
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index cfd094e58f..95c64510a9 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -28,7 +28,6 @@ from synapse.api.urls import (
     FEDERATION_V1_PREFIX,
     FEDERATION_V2_PREFIX,
 )
-from synapse.http.endpoint import parse_and_validate_server_name
 from synapse.http.server import JsonResource
 from synapse.http.servlet import (
     parse_boolean_from_args,
@@ -45,6 +44,7 @@ from synapse.logging.opentracing import (
 )
 from synapse.server import HomeServer
 from synapse.types import ThirdPartyInstanceID, get_domain_from_id
+from synapse.util.stringutils import parse_and_validate_server_name
 from synapse.util.versionstring import get_version_string
 
 logger = logging.getLogger(__name__)
diff --git a/synapse/handlers/cas_handler.py b/synapse/handlers/cas_handler.py
index f3430c6713..0f342c607b 100644
--- a/synapse/handlers/cas_handler.py
+++ b/synapse/handlers/cas_handler.py
@@ -80,6 +80,10 @@ class CasHandler:
         # user-facing name of this auth provider
         self.idp_name = "CAS"
 
+        # we do not currently support icons for CAS auth, but this is required by
+        # the SsoIdentityProvider protocol type.
+        self.idp_icon = None
+
         self._sso_handler = hs.get_sso_handler()
 
         self._sso_handler.register_identity_provider(self)
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index c05036ad1f..f61844d688 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -476,8 +476,6 @@ class IdentityHandler(BaseHandler):
         except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
-        assert self.hs.config.public_baseurl
-
         # we need to tell the client to send the token back to us, since it doesn't
         # otherwise know where to send it, so add submit_url response parameter
         # (see also MSC2078)
diff --git a/synapse/handlers/oidc_handler.py b/synapse/handlers/oidc_handler.py
index ba686d74b2..1607e12935 100644
--- a/synapse/handlers/oidc_handler.py
+++ b/synapse/handlers/oidc_handler.py
@@ -271,6 +271,9 @@ class OidcProvider:
         # user-facing name of this auth provider
         self.idp_name = provider.idp_name
 
+        # MXC URI for icon for this auth provider
+        self.idp_icon = provider.idp_icon
+
         self._sso_handler = hs.get_sso_handler()
 
         self._sso_handler.register_identity_provider(self)
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 3bece6d668..ee27d99135 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -38,7 +38,6 @@ from synapse.api.filtering import Filter
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.events import EventBase
 from synapse.events.utils import copy_power_levels_contents
-from synapse.http.endpoint import parse_and_validate_server_name
 from synapse.storage.state import StateFilter
 from synapse.types import (
     JsonDict,
@@ -55,6 +54,7 @@ from synapse.types import (
 from synapse.util import stringutils
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.response_cache import ResponseCache
+from synapse.util.stringutils import parse_and_validate_server_name
 from synapse.visibility import filter_events_for_client
 
 from ._base import BaseHandler
diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py
index a8376543c9..38461cf79d 100644
--- a/synapse/handlers/saml_handler.py
+++ b/synapse/handlers/saml_handler.py
@@ -78,6 +78,10 @@ class SamlHandler(BaseHandler):
         # user-facing name of this auth provider
         self.idp_name = "SAML"
 
+        # we do not currently support icons for SAML auth, but this is required by
+        # the SsoIdentityProvider protocol type.
+        self.idp_icon = None
+
         # a map from saml session id to Saml2SessionData object
         self._outstanding_requests_dict = {}  # type: Dict[str, Saml2SessionData]
 
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index dcc85e9871..d493327a10 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -75,6 +75,11 @@ class SsoIdentityProvider(Protocol):
     def idp_name(self) -> str:
         """User-facing name for this provider"""
 
+    @property
+    def idp_icon(self) -> Optional[str]:
+        """Optional MXC URI for user-facing icon"""
+        return None
+
     @abc.abstractmethod
     async def handle_redirect_request(
         self,
diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py
deleted file mode 100644
index 92a5b606c8..0000000000
--- a/synapse/http/endpoint.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2014-2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-import re
-
-logger = logging.getLogger(__name__)
-
-
-def parse_server_name(server_name):
-    """Split a server name into host/port parts.
-
-    Args:
-        server_name (str): server name to parse
-
-    Returns:
-        Tuple[str, int|None]: host/port parts.
-
-    Raises:
-        ValueError if the server name could not be parsed.
-    """
-    try:
-        if server_name[-1] == "]":
-            # ipv6 literal, hopefully
-            return server_name, None
-
-        domain_port = server_name.rsplit(":", 1)
-        domain = domain_port[0]
-        port = int(domain_port[1]) if domain_port[1:] else None
-        return domain, port
-    except Exception:
-        raise ValueError("Invalid server name '%s'" % server_name)
-
-
-VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z")
-
-
-def parse_and_validate_server_name(server_name):
-    """Split a server name into host/port parts and do some basic validation.
-
-    Args:
-        server_name (str): server name to parse
-
-    Returns:
-        Tuple[str, int|None]: host/port parts.
-
-    Raises:
-        ValueError if the server name could not be parsed.
-    """
-    host, port = parse_server_name(server_name)
-
-    # these tests don't need to be bulletproof as we'll find out soon enough
-    # if somebody is giving us invalid data. What we *do* need is to be sure
-    # that nobody is sneaking IP literals in that look like hostnames, etc.
-
-    # look for ipv6 literals
-    if host[0] == "[":
-        if host[-1] != "]":
-            raise ValueError("Mismatched [...] in server name '%s'" % (server_name,))
-        return host, port
-
-    # otherwise it should only be alphanumerics.
-    if not VALID_HOST_REGEX.match(host):
-        raise ValueError(
-            "Server name '%s' contains invalid characters" % (server_name,)
-        )
-
-    return host, port
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index c97e0df1f5..bfd46a3730 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -86,8 +86,8 @@ REQUIREMENTS = [
 
 CONDITIONAL_REQUIREMENTS = {
     "matrix-synapse-ldap3": ["matrix-synapse-ldap3>=0.1"],
-    # we use execute_batch, which arrived in psycopg 2.7.
-    "postgres": ["psycopg2>=2.7"],
+    # we use execute_values with the fetch param, which arrived in psycopg 2.8.
+    "postgres": ["psycopg2>=2.8"],
     # ACME support is required to provision TLS certificates from authorities
     # that use the protocol, such as Let's Encrypt.
     "acme": [
diff --git a/synapse/res/templates/sso_login_idp_picker.html b/synapse/res/templates/sso_login_idp_picker.html
index f53c9cd679..5b38481012 100644
--- a/synapse/res/templates/sso_login_idp_picker.html
+++ b/synapse/res/templates/sso_login_idp_picker.html
@@ -17,6 +17,9 @@
                     <li>
                         <input type="radio" name="idp" id="prov{{loop.index}}" value="{{p.idp_id}}">
                         <label for="prov{{loop.index}}">{{p.idp_name | e}}</label>
+{% if p.idp_icon %}
+                        <img src="{{p.idp_icon | mxc_to_http(32, 32)}}"/>
+{% endif %}
                     </li>
 {% endfor %}
                 </ul>
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index e6725b03b0..f95627ee61 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -32,7 +32,6 @@ from synapse.api.errors import (
 )
 from synapse.api.filtering import Filter
 from synapse.events.utils import format_event_for_client_v2
-from synapse.http.endpoint import parse_and_validate_server_name
 from synapse.http.servlet import (
     RestServlet,
     assert_params_in_dict,
@@ -47,7 +46,7 @@ from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID
 from synapse.util import json_decoder
-from synapse.util.stringutils import random_string
+from synapse.util.stringutils import parse_and_validate_server_name, random_string
 
 if TYPE_CHECKING:
     import synapse.server
diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py
index f591cc6c5c..241fe746d9 100644
--- a/synapse/rest/well_known.py
+++ b/synapse/rest/well_known.py
@@ -34,10 +34,6 @@ class WellKnownBuilder:
         self._config = hs.config
 
     def get_well_known(self):
-        # if we don't have a public_baseurl, we can't help much here.
-        if self._config.public_baseurl is None:
-            return None
-
         result = {"m.homeserver": {"base_url": self._config.public_baseurl}}
 
         if self._config.default_identity_server:
diff --git a/synapse/server_notices/resource_limits_server_notices.py b/synapse/server_notices/resource_limits_server_notices.py
index 2258d306d9..8dd01fce76 100644
--- a/synapse/server_notices/resource_limits_server_notices.py
+++ b/synapse/server_notices/resource_limits_server_notices.py
@@ -42,6 +42,7 @@ class ResourceLimitsServerNotices:
         self._auth = hs.get_auth()
         self._config = hs.config
         self._resouce_limited = False
+        self._account_data_handler = hs.get_account_data_handler()
         self._message_handler = hs.get_message_handler()
         self._state = hs.get_state_handler()
 
@@ -177,7 +178,7 @@ class ResourceLimitsServerNotices:
                 # tag already present, nothing to do here
                 need_to_set_tag = False
         if need_to_set_tag:
-            max_id = await self._store.add_tag_to_room(
+            max_id = await self._account_data_handler.add_tag_to_room(
                 user_id, room_id, SERVER_NOTICE_ROOM_TAG, {}
             )
             self._notifier.on_new_event("account_data_key", max_id, users=[user_id])
diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py
index 100dbd5e2c..c46b2f047d 100644
--- a/synapse/server_notices/server_notices_manager.py
+++ b/synapse/server_notices/server_notices_manager.py
@@ -35,6 +35,7 @@ class ServerNoticesManager:
 
         self._store = hs.get_datastore()
         self._config = hs.config
+        self._account_data_handler = hs.get_account_data_handler()
         self._room_creation_handler = hs.get_room_creation_handler()
         self._room_member_handler = hs.get_room_member_handler()
         self._event_creation_handler = hs.get_event_creation_handler()
@@ -163,7 +164,7 @@ class ServerNoticesManager:
         )
         room_id = info["room_id"]
 
-        max_id = await self._store.add_tag_to_room(
+        max_id = await self._account_data_handler.add_tag_to_room(
             user_id, room_id, SERVER_NOTICE_ROOM_TAG, {}
         )
         self._notifier.on_new_event("account_data_key", max_id, users=[user_id])
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 68896f34af..a277a1ef13 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -68,7 +68,7 @@ class AccountDataWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if hs.get_instance_name() in hs.config.worker.writers.events:
+            if hs.get_instance_name() in hs.config.worker.writers.account_data:
                 self._account_data_id_gen = StreamIdGenerator(
                     db_conn,
                     "room_account_data",
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index e0e57f0578..e4843a202c 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -45,7 +45,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             self._receipts_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
-                stream_name="account_data",
+                stream_name="receipts",
                 instance_name=self._instance_name,
                 tables=[("receipts_linearized", "instance_name", "stream_id")],
                 sequence_name="receipts_sequence",
@@ -61,7 +61,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if hs.get_instance_name() in hs.config.worker.writers.events:
+            if hs.get_instance_name() in hs.config.worker.writers.receipts:
                 self._receipts_id_gen = StreamIdGenerator(
                     db_conn, "receipts_linearized", "stream_id"
                 )
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 284f2ce77c..a9fcb5f59c 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -16,7 +16,6 @@
 
 import collections
 import logging
-import re
 from abc import abstractmethod
 from enum import Enum
 from typing import Any, Dict, List, Optional, Tuple
@@ -30,6 +29,7 @@ from synapse.storage.databases.main.search import SearchStore
 from synapse.types import JsonDict, ThirdPartyInstanceID
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
+from synapse.util.stringutils import MXC_REGEX
 
 logger = logging.getLogger(__name__)
 
@@ -660,8 +660,6 @@ class RoomWorkerStore(SQLBaseStore):
             The local and remote media as a lists of tuples where the key is
             the hostname and the value is the media ID.
         """
-        mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)")
-
         sql = """
             SELECT stream_ordering, json FROM events
             JOIN event_json USING (room_id, event_id)
@@ -688,7 +686,7 @@ class RoomWorkerStore(SQLBaseStore):
                 for url in (content_url, thumbnail_url):
                     if not url:
                         continue
-                    matches = mxc_re.match(url)
+                    matches = MXC_REGEX.match(url)
                     if matches:
                         hostname = matches.group(1)
                         media_id = matches.group(2)
diff --git a/synapse/storage/databases/main/schema/delta/59/07shard_account_data_fix.sql b/synapse/storage/databases/main/schema/delta/59/07shard_account_data_fix.sql
new file mode 100644
index 0000000000..9f2b5ebc5a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/59/07shard_account_data_fix.sql
@@ -0,0 +1,18 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We incorrectly populated these, so we delete them and let the
+-- MultiWriterIdGenerator repopulate it.
+DELETE FROM stream_positions WHERE stream_name = 'receipts' OR stream_name = 'account_data';
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 39a3ab1162..bb84c0d792 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -261,7 +261,11 @@ class MultiWriterIdGenerator:
         # We check that the table and sequence haven't diverged.
         for table, _, id_column in tables:
             self._sequence_gen.check_consistency(
-                db_conn, table=table, id_column=id_column, positive=positive
+                db_conn,
+                table=table,
+                id_column=id_column,
+                stream_name=stream_name,
+                positive=positive,
             )
 
         # This goes and fills out the above state from the database.
diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py
index 412df6b8ef..c780ade077 100644
--- a/synapse/storage/util/sequence.py
+++ b/synapse/storage/util/sequence.py
@@ -45,6 +45,21 @@ and run the following SQL:
 See docs/postgres.md for more information.
 """
 
+_INCONSISTENT_STREAM_ERROR = """
+Postgres sequence '%(seq)s' is inconsistent with associated stream position
+of '%(stream_name)s' in the 'stream_positions' table.
+
+This is likely a programming error and should be reported at
+https://github.com/matrix-org/synapse.
+
+A temporary workaround to fix this error is to shut down Synapse (including
+any and all workers) and run the following SQL:
+
+    DELETE FROM stream_positions WHERE stream_name = '%(stream_name)s';
+
+This will need to be done every time the server is restarted.
+"""
+
 
 class SequenceGenerator(metaclass=abc.ABCMeta):
     """A class which generates a unique sequence of integers"""
@@ -60,14 +75,20 @@ class SequenceGenerator(metaclass=abc.ABCMeta):
         db_conn: "LoggingDatabaseConnection",
         table: str,
         id_column: str,
+        stream_name: Optional[str] = None,
         positive: bool = True,
     ):
         """Should be called during start up to test that the current value of
         the sequence is greater than or equal to the maximum ID in the table.
 
-        This is to handle various cases where the sequence value can get out
-        of sync with the table, e.g. if Synapse gets rolled back to a previous
+        This is to handle various cases where the sequence value can get out of
+        sync with the table, e.g. if Synapse gets rolled back to a previous
         version and the rolled forwards again.
+
+        If a stream name is given then this will check that any value in the
+        `stream_positions` table is less than or equal to the current sequence
+        value. If it isn't then it's likely that streams have been crossed
+        somewhere (e.g. two ID generators have the same stream name).
         """
         ...
 
@@ -93,8 +114,12 @@ class PostgresSequenceGenerator(SequenceGenerator):
         db_conn: "LoggingDatabaseConnection",
         table: str,
         id_column: str,
+        stream_name: Optional[str] = None,
         positive: bool = True,
     ):
+        """See SequenceGenerator.check_consistency for docstring.
+        """
+
         txn = db_conn.cursor(txn_name="sequence.check_consistency")
 
         # First we get the current max ID from the table.
@@ -118,6 +143,18 @@ class PostgresSequenceGenerator(SequenceGenerator):
             "SELECT last_value, is_called FROM %(seq)s" % {"seq": self._sequence_name}
         )
         last_value, is_called = txn.fetchone()
+
+        # If we have an associated stream check the stream_positions table.
+        max_in_stream_positions = None
+        if stream_name:
+            txn.execute(
+                "SELECT MAX(stream_id) FROM stream_positions WHERE stream_name = ?",
+                (stream_name,),
+            )
+            row = txn.fetchone()
+            if row:
+                max_in_stream_positions = row[0]
+
         txn.close()
 
         # If `is_called` is False then `last_value` is actually the value that
@@ -138,6 +175,14 @@ class PostgresSequenceGenerator(SequenceGenerator):
                 % {"seq": self._sequence_name, "table": table, "max_id_sql": table_sql}
             )
 
+        # If we have values in the stream positions table then they have to be
+        # less than or equal to `last_value`
+        if max_in_stream_positions and max_in_stream_positions > last_value:
+            raise IncorrectDatabaseSetup(
+                _INCONSISTENT_STREAM_ERROR
+                % {"seq": self._sequence_name, "stream_name": stream_name}
+            )
+
 
 GetFirstCallbackType = Callable[[Cursor], int]
 
@@ -175,7 +220,12 @@ class LocalSequenceGenerator(SequenceGenerator):
             return self._current_max_id
 
     def check_consistency(
-        self, db_conn: Connection, table: str, id_column: str, positive: bool = True
+        self,
+        db_conn: Connection,
+        table: str,
+        id_column: str,
+        stream_name: Optional[str] = None,
+        positive: bool = True,
     ):
         # There is nothing to do for in memory sequences
         pass
diff --git a/synapse/types.py b/synapse/types.py
index 20a43d05bf..eafe729dfe 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -37,7 +37,7 @@ from signedjson.key import decode_verify_key_bytes
 from unpaddedbase64 import decode_base64
 
 from synapse.api.errors import Codes, SynapseError
-from synapse.http.endpoint import parse_and_validate_server_name
+from synapse.util.stringutils import parse_and_validate_server_name
 
 if TYPE_CHECKING:
     from synapse.appservice.api import ApplicationService
diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py
index 6ef2b008a4..8d2411513f 100644
--- a/synapse/util/iterutils.py
+++ b/synapse/util/iterutils.py
@@ -78,7 +78,7 @@ def sorted_topologically(
         if node not in degree_map:
             continue
 
-        for edge in edges:
+        for edge in set(edges):
             if edge in degree_map:
                 degree_map[node] += 1
 
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index b103c8694c..f8038bf861 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -18,6 +18,7 @@ import random
 import re
 import string
 from collections.abc import Iterable
+from typing import Optional, Tuple
 
 from synapse.api.errors import Codes, SynapseError
 
@@ -26,6 +27,15 @@ _string_with_symbols = string.digits + string.ascii_letters + ".,;:^&*-_+=#~@"
 # https://matrix.org/docs/spec/client_server/r0.6.0#post-matrix-client-r0-register-email-requesttoken
 client_secret_regex = re.compile(r"^[0-9a-zA-Z\.\=\_\-]+$")
 
+# https://matrix.org/docs/spec/client_server/r0.6.1#matrix-content-mxc-uris,
+# together with https://github.com/matrix-org/matrix-doc/issues/2177 which basically
+# says "there is no grammar for media ids"
+#
+# The server_name part of this is purposely lax: use parse_and_validate_mxc for
+# additional validation.
+#
+MXC_REGEX = re.compile("^mxc://([^/]+)/([^/#?]+)$")
+
 # random_string and random_string_with_symbols are used for a range of things,
 # some cryptographically important, some less so. We use SystemRandom to make sure
 # we get cryptographically-secure randoms.
@@ -59,6 +69,88 @@ def assert_valid_client_secret(client_secret):
         )
 
 
+def parse_server_name(server_name: str) -> Tuple[str, Optional[int]]:
+    """Split a server name into host/port parts.
+
+    Args:
+        server_name: server name to parse
+
+    Returns:
+        host/port parts.
+
+    Raises:
+        ValueError if the server name could not be parsed.
+    """
+    try:
+        if server_name[-1] == "]":
+            # ipv6 literal, hopefully
+            return server_name, None
+
+        domain_port = server_name.rsplit(":", 1)
+        domain = domain_port[0]
+        port = int(domain_port[1]) if domain_port[1:] else None
+        return domain, port
+    except Exception:
+        raise ValueError("Invalid server name '%s'" % server_name)
+
+
+VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z")
+
+
+def parse_and_validate_server_name(server_name: str) -> Tuple[str, Optional[int]]:
+    """Split a server name into host/port parts and do some basic validation.
+
+    Args:
+        server_name: server name to parse
+
+    Returns:
+        host/port parts.
+
+    Raises:
+        ValueError if the server name could not be parsed.
+    """
+    host, port = parse_server_name(server_name)
+
+    # these tests don't need to be bulletproof as we'll find out soon enough
+    # if somebody is giving us invalid data. What we *do* need is to be sure
+    # that nobody is sneaking IP literals in that look like hostnames, etc.
+
+    # look for ipv6 literals
+    if host[0] == "[":
+        if host[-1] != "]":
+            raise ValueError("Mismatched [...] in server name '%s'" % (server_name,))
+        return host, port
+
+    # otherwise it should only be alphanumerics.
+    if not VALID_HOST_REGEX.match(host):
+        raise ValueError(
+            "Server name '%s' contains invalid characters" % (server_name,)
+        )
+
+    return host, port
+
+
+def parse_and_validate_mxc_uri(mxc: str) -> Tuple[str, Optional[int], str]:
+    """Parse the given string as an MXC URI
+
+    Checks that the "server name" part is a valid server name
+
+    Args:
+        mxc: the (alleged) MXC URI to be checked
+    Returns:
+        hostname, port, media id
+    Raises:
+        ValueError if the URI cannot be parsed
+    """
+    m = MXC_REGEX.match(mxc)
+    if not m:
+        raise ValueError("mxc URI %r did not match expected format" % (mxc,))
+    server_name = m.group(1)
+    media_id = m.group(2)
+    host, port = parse_and_validate_server_name(server_name)
+    return host, port, media_id
+
+
 def shortstr(iterable: Iterable, maxitems: int = 5) -> str:
     """If iterable has maxitems or fewer, return the stringification of a list
     containing those items.