summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/6037.feature1
-rw-r--r--docs/sample_config.yaml136
-rw-r--r--synapse/config/saml2_config.py191
-rw-r--r--synapse/handlers/saml_handler.py105
-rw-r--r--synapse/rest/client/v1/login.py14
-rw-r--r--synapse/storage/registration.py41
-rw-r--r--synapse/storage/schema/delta/56/user_external_ids.sql24
7 files changed, 400 insertions, 112 deletions
diff --git a/changelog.d/6037.feature b/changelog.d/6037.feature
new file mode 100644
index 0000000000..95d82bd4d8
--- /dev/null
+++ b/changelog.d/6037.feature
@@ -0,0 +1 @@
+Handle userid clashes when authenticating via SAML by appending an integer suffix.
\ No newline at end of file
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 1ee0ba8c30..81f74b83ff 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1101,12 +1101,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key"
 
 # Enable SAML2 for registration and login. Uses pysaml2.
 #
-# `sp_config` is the configuration for the pysaml2 Service Provider.
-# See pysaml2 docs for format of config.
+# At least one of `sp_config` or `config_path` must be set in this section to
+# enable SAML login.
 #
-# Default values will be used for the 'entityid' and 'service' settings,
-# so it is not normally necessary to specify them unless you need to
-# override them.
+# (You will probably also want to set the following options to `false` to
+# disable the regular login/registration flows:
+#   * enable_registration
+#   * password_config.enabled
 #
 # Once SAML support is enabled, a metadata file will be exposed at
 # https://<server>:<port>/_matrix/saml2/metadata.xml, which you may be able to
@@ -1114,52 +1115,85 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key"
 # the IdP to use an ACS location of
 # https://<server>:<port>/_matrix/saml2/authn_response.
 #
-#saml2_config:
-#  sp_config:
-#    # point this to the IdP's metadata. You can use either a local file or
-#    # (preferably) a URL.
-#    metadata:
-#      #local: ["saml2/idp.xml"]
-#      remote:
-#        - url: https://our_idp/metadata.xml
-#
-#    # By default, the user has to go to our login page first. If you'd like to
-#    # allow IdP-initiated login, set 'allow_unsolicited: True' in a
-#    # 'service.sp' section:
-#    #
-#    #service:
-#    #  sp:
-#    #    allow_unsolicited: True
-#
-#    # The examples below are just used to generate our metadata xml, and you
-#    # may well not need it, depending on your setup. Alternatively you
-#    # may need a whole lot more detail - see the pysaml2 docs!
-#
-#    description: ["My awesome SP", "en"]
-#    name: ["Test SP", "en"]
-#
-#    organization:
-#      name: Example com
-#      display_name:
-#        - ["Example co", "en"]
-#      url: "http://example.com"
-#
-#    contact_person:
-#      - given_name: Bob
-#        sur_name: "the Sysadmin"
-#        email_address": ["admin@example.com"]
-#        contact_type": technical
-#
-#  # Instead of putting the config inline as above, you can specify a
-#  # separate pysaml2 configuration file:
-#  #
-#  config_path: "CONFDIR/sp_conf.py"
-#
-#  # the lifetime of a SAML session. This defines how long a user has to
-#  # complete the authentication process, if allow_unsolicited is unset.
-#  # The default is 5 minutes.
-#  #
-#  # saml_session_lifetime: 5m
+saml2_config:
+  # `sp_config` is the configuration for the pysaml2 Service Provider.
+  # See pysaml2 docs for format of config.
+  #
+  # Default values will be used for the 'entityid' and 'service' settings,
+  # so it is not normally necessary to specify them unless you need to
+  # override them.
+  #
+  #sp_config:
+  #  # point this to the IdP's metadata. You can use either a local file or
+  #  # (preferably) a URL.
+  #  metadata:
+  #    #local: ["saml2/idp.xml"]
+  #    remote:
+  #      - url: https://our_idp/metadata.xml
+  #
+  #    # By default, the user has to go to our login page first. If you'd like
+  #    # to allow IdP-initiated login, set 'allow_unsolicited: True' in a
+  #    # 'service.sp' section:
+  #    #
+  #    #service:
+  #    #  sp:
+  #    #    allow_unsolicited: true
+  #
+  #    # The examples below are just used to generate our metadata xml, and you
+  #    # may well not need them, depending on your setup. Alternatively you
+  #    # may need a whole lot more detail - see the pysaml2 docs!
+  #
+  #    description: ["My awesome SP", "en"]
+  #    name: ["Test SP", "en"]
+  #
+  #    organization:
+  #      name: Example com
+  #      display_name:
+  #        - ["Example co", "en"]
+  #      url: "http://example.com"
+  #
+  #    contact_person:
+  #      - given_name: Bob
+  #        sur_name: "the Sysadmin"
+  #        email_address": ["admin@example.com"]
+  #        contact_type": technical
+
+  # Instead of putting the config inline as above, you can specify a
+  # separate pysaml2 configuration file:
+  #
+  #config_path: "CONFDIR/sp_conf.py"
+
+  # the lifetime of a SAML session. This defines how long a user has to
+  # complete the authentication process, if allow_unsolicited is unset.
+  # The default is 5 minutes.
+  #
+  #saml_session_lifetime: 5m
+
+  # The SAML attribute (after mapping via the attribute maps) to use to derive
+  # the Matrix ID from. 'uid' by default.
+  #
+  #mxid_source_attribute: displayName
+
+  # The mapping system to use for mapping the saml attribute onto a matrix ID.
+  # Options include:
+  #  * 'hexencode' (which maps unpermitted characters to '=xx')
+  #  * 'dotreplace' (which replaces unpermitted characters with '.').
+  # The default is 'hexencode'.
+  #
+  #mxid_mapping: dotreplace
+
+  # In previous versions of synapse, the mapping from SAML attribute to MXID was
+  # always calculated dynamically rather than stored in a table. For backwards-
+  # compatibility, we will look for user_ids matching such a pattern before
+  # creating a new account.
+  #
+  # This setting controls the SAML attribute which will be used for this
+  # backwards-compatibility lookup. Typically it should be 'uid', but if the
+  # attribute maps are changed, it may be necessary to change it.
+  #
+  # The default is 'uid'.
+  #
+  #grandfathered_mxid_source_attribute: upn
 
 
 
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index 6a8161547a..a022470702 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -12,7 +12,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
+
 from synapse.python_dependencies import DependencyException, check_requirements
+from synapse.types import (
+    map_username_to_mxid_localpart,
+    mxid_localpart_allowed_characters,
+)
 
 from ._base import Config, ConfigError
 
@@ -26,6 +32,9 @@ class SAML2Config(Config):
         if not saml2_config or not saml2_config.get("enabled", True):
             return
 
+        if not saml2_config.get("sp_config") and not saml2_config.get("config_path"):
+            return
+
         try:
             check_requirements("saml2")
         except DependencyException as e:
@@ -33,6 +42,14 @@ class SAML2Config(Config):
 
         self.saml2_enabled = True
 
+        self.saml2_mxid_source_attribute = saml2_config.get(
+            "mxid_source_attribute", "uid"
+        )
+
+        self.saml2_grandfathered_mxid_source_attribute = saml2_config.get(
+            "grandfathered_mxid_source_attribute", "uid"
+        )
+
         import saml2.config
 
         self.saml2_sp_config = saml2.config.SPConfig()
@@ -48,6 +65,12 @@ class SAML2Config(Config):
             saml2_config.get("saml_session_lifetime", "5m")
         )
 
+        mapping = saml2_config.get("mxid_mapping", "hexencode")
+        try:
+            self.saml2_mxid_mapper = MXID_MAPPER_MAP[mapping]
+        except KeyError:
+            raise ConfigError("%s is not a known mxid_mapping" % (mapping,))
+
     def _default_saml_config_dict(self):
         import saml2
 
@@ -55,6 +78,13 @@ class SAML2Config(Config):
         if public_baseurl is None:
             raise ConfigError("saml2_config requires a public_baseurl to be set")
 
+        required_attributes = {"uid", self.saml2_mxid_source_attribute}
+
+        optional_attributes = {"displayName"}
+        if self.saml2_grandfathered_mxid_source_attribute:
+            optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute)
+        optional_attributes -= required_attributes
+
         metadata_url = public_baseurl + "_matrix/saml2/metadata.xml"
         response_url = public_baseurl + "_matrix/saml2/authn_response"
         return {
@@ -66,8 +96,9 @@ class SAML2Config(Config):
                             (response_url, saml2.BINDING_HTTP_POST)
                         ]
                     },
-                    "required_attributes": ["uid"],
-                    "optional_attributes": ["mail", "surname", "givenname"],
+                    "required_attributes": list(required_attributes),
+                    "optional_attributes": list(optional_attributes),
+                    # "name_id_format": saml2.saml.NAMEID_FORMAT_PERSISTENT,
                 }
             },
         }
@@ -76,12 +107,13 @@ class SAML2Config(Config):
         return """\
         # Enable SAML2 for registration and login. Uses pysaml2.
         #
-        # `sp_config` is the configuration for the pysaml2 Service Provider.
-        # See pysaml2 docs for format of config.
+        # At least one of `sp_config` or `config_path` must be set in this section to
+        # enable SAML login.
         #
-        # Default values will be used for the 'entityid' and 'service' settings,
-        # so it is not normally necessary to specify them unless you need to
-        # override them.
+        # (You will probably also want to set the following options to `false` to
+        # disable the regular login/registration flows:
+        #   * enable_registration
+        #   * password_config.enabled
         #
         # Once SAML support is enabled, a metadata file will be exposed at
         # https://<server>:<port>/_matrix/saml2/metadata.xml, which you may be able to
@@ -89,52 +121,105 @@ class SAML2Config(Config):
         # the IdP to use an ACS location of
         # https://<server>:<port>/_matrix/saml2/authn_response.
         #
-        #saml2_config:
-        #  sp_config:
-        #    # point this to the IdP's metadata. You can use either a local file or
-        #    # (preferably) a URL.
-        #    metadata:
-        #      #local: ["saml2/idp.xml"]
-        #      remote:
-        #        - url: https://our_idp/metadata.xml
-        #
-        #    # By default, the user has to go to our login page first. If you'd like to
-        #    # allow IdP-initiated login, set 'allow_unsolicited: True' in a
-        #    # 'service.sp' section:
-        #    #
-        #    #service:
-        #    #  sp:
-        #    #    allow_unsolicited: True
-        #
-        #    # The examples below are just used to generate our metadata xml, and you
-        #    # may well not need it, depending on your setup. Alternatively you
-        #    # may need a whole lot more detail - see the pysaml2 docs!
-        #
-        #    description: ["My awesome SP", "en"]
-        #    name: ["Test SP", "en"]
-        #
-        #    organization:
-        #      name: Example com
-        #      display_name:
-        #        - ["Example co", "en"]
-        #      url: "http://example.com"
-        #
-        #    contact_person:
-        #      - given_name: Bob
-        #        sur_name: "the Sysadmin"
-        #        email_address": ["admin@example.com"]
-        #        contact_type": technical
-        #
-        #  # Instead of putting the config inline as above, you can specify a
-        #  # separate pysaml2 configuration file:
-        #  #
-        #  config_path: "%(config_dir_path)s/sp_conf.py"
-        #
-        #  # the lifetime of a SAML session. This defines how long a user has to
-        #  # complete the authentication process, if allow_unsolicited is unset.
-        #  # The default is 5 minutes.
-        #  #
-        #  # saml_session_lifetime: 5m
+        saml2_config:
+          # `sp_config` is the configuration for the pysaml2 Service Provider.
+          # See pysaml2 docs for format of config.
+          #
+          # Default values will be used for the 'entityid' and 'service' settings,
+          # so it is not normally necessary to specify them unless you need to
+          # override them.
+          #
+          #sp_config:
+          #  # point this to the IdP's metadata. You can use either a local file or
+          #  # (preferably) a URL.
+          #  metadata:
+          #    #local: ["saml2/idp.xml"]
+          #    remote:
+          #      - url: https://our_idp/metadata.xml
+          #
+          #    # By default, the user has to go to our login page first. If you'd like
+          #    # to allow IdP-initiated login, set 'allow_unsolicited: True' in a
+          #    # 'service.sp' section:
+          #    #
+          #    #service:
+          #    #  sp:
+          #    #    allow_unsolicited: true
+          #
+          #    # The examples below are just used to generate our metadata xml, and you
+          #    # may well not need them, depending on your setup. Alternatively you
+          #    # may need a whole lot more detail - see the pysaml2 docs!
+          #
+          #    description: ["My awesome SP", "en"]
+          #    name: ["Test SP", "en"]
+          #
+          #    organization:
+          #      name: Example com
+          #      display_name:
+          #        - ["Example co", "en"]
+          #      url: "http://example.com"
+          #
+          #    contact_person:
+          #      - given_name: Bob
+          #        sur_name: "the Sysadmin"
+          #        email_address": ["admin@example.com"]
+          #        contact_type": technical
+
+          # Instead of putting the config inline as above, you can specify a
+          # separate pysaml2 configuration file:
+          #
+          #config_path: "%(config_dir_path)s/sp_conf.py"
+
+          # the lifetime of a SAML session. This defines how long a user has to
+          # complete the authentication process, if allow_unsolicited is unset.
+          # The default is 5 minutes.
+          #
+          #saml_session_lifetime: 5m
+
+          # The SAML attribute (after mapping via the attribute maps) to use to derive
+          # the Matrix ID from. 'uid' by default.
+          #
+          #mxid_source_attribute: displayName
+
+          # The mapping system to use for mapping the saml attribute onto a matrix ID.
+          # Options include:
+          #  * 'hexencode' (which maps unpermitted characters to '=xx')
+          #  * 'dotreplace' (which replaces unpermitted characters with '.').
+          # The default is 'hexencode'.
+          #
+          #mxid_mapping: dotreplace
+
+          # In previous versions of synapse, the mapping from SAML attribute to MXID was
+          # always calculated dynamically rather than stored in a table. For backwards-
+          # compatibility, we will look for user_ids matching such a pattern before
+          # creating a new account.
+          #
+          # This setting controls the SAML attribute which will be used for this
+          # backwards-compatibility lookup. Typically it should be 'uid', but if the
+          # attribute maps are changed, it may be necessary to change it.
+          #
+          # The default is 'uid'.
+          #
+          #grandfathered_mxid_source_attribute: upn
         """ % {
             "config_dir_path": config_dir_path
         }
+
+
+DOT_REPLACE_PATTERN = re.compile(
+    ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),))
+)
+
+
+def dot_replace_for_mxid(username: str) -> str:
+    username = username.lower()
+    username = DOT_REPLACE_PATTERN.sub(".", username)
+
+    # regular mxids aren't allowed to start with an underscore either
+    username = re.sub("^_", "", username)
+    return username
+
+
+MXID_MAPPER_MAP = {
+    "hexencode": map_username_to_mxid_localpart,
+    "dotreplace": dot_replace_for_mxid,
+}
diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py
index a1ce6929cf..f000d2a00f 100644
--- a/synapse/handlers/saml_handler.py
+++ b/synapse/handlers/saml_handler.py
@@ -21,6 +21,8 @@ from saml2.client import Saml2Client
 from synapse.api.errors import SynapseError
 from synapse.http.servlet import parse_string
 from synapse.rest.client.v1.login import SSOAuthHandler
+from synapse.types import UserID, map_username_to_mxid_localpart
+from synapse.util.async_helpers import Linearizer
 
 logger = logging.getLogger(__name__)
 
@@ -29,12 +31,26 @@ class SamlHandler:
     def __init__(self, hs):
         self._saml_client = Saml2Client(hs.config.saml2_sp_config)
         self._sso_auth_handler = SSOAuthHandler(hs)
+        self._registration_handler = hs.get_registration_handler()
+
+        self._clock = hs.get_clock()
+        self._datastore = hs.get_datastore()
+        self._hostname = hs.hostname
+        self._saml2_session_lifetime = hs.config.saml2_session_lifetime
+        self._mxid_source_attribute = hs.config.saml2_mxid_source_attribute
+        self._grandfathered_mxid_source_attribute = (
+            hs.config.saml2_grandfathered_mxid_source_attribute
+        )
+        self._mxid_mapper = hs.config.saml2_mxid_mapper
+
+        # identifier for the external_ids table
+        self._auth_provider_id = "saml"
 
         # a map from saml session id to Saml2SessionData object
         self._outstanding_requests_dict = {}
 
-        self._clock = hs.get_clock()
-        self._saml2_session_lifetime = hs.config.saml2_session_lifetime
+        # a lock on the mappings
+        self._mapping_lock = Linearizer(name="saml_mapping", clock=self._clock)
 
     def handle_redirect_request(self, client_redirect_url):
         """Handle an incoming request to /login/sso/redirect
@@ -60,7 +76,7 @@ class SamlHandler:
         # this shouldn't happen!
         raise Exception("prepare_for_authenticate didn't return a Location header")
 
-    def handle_saml_response(self, request):
+    async def handle_saml_response(self, request):
         """Handle an incoming request to /_matrix/saml2/authn_response
 
         Args:
@@ -77,6 +93,10 @@ class SamlHandler:
         # the dict.
         self.expire_sessions()
 
+        user_id = await self._map_saml_response_to_user(resp_bytes)
+        self._sso_auth_handler.complete_sso_login(user_id, request, relay_state)
+
+    async def _map_saml_response_to_user(self, resp_bytes):
         try:
             saml2_auth = self._saml_client.parse_authn_request_response(
                 resp_bytes,
@@ -91,18 +111,87 @@ class SamlHandler:
             logger.warning("SAML2 response was not signed")
             raise SynapseError(400, "SAML2 response was not signed")
 
-        if "uid" not in saml2_auth.ava:
+        logger.info("Got SAML2 reponse with attributes: %s", saml2_auth.ava)
+
+        try:
+            remote_user_id = saml2_auth.ava["uid"][0]
+        except KeyError:
             logger.warning("SAML2 response lacks a 'uid' attestation")
             raise SynapseError(400, "uid not in SAML2 response")
 
+        try:
+            mxid_source = saml2_auth.ava[self._mxid_source_attribute][0]
+        except KeyError:
+            logger.warning(
+                "SAML2 response lacks a '%s' attestation", self._mxid_source_attribute
+            )
+            raise SynapseError(
+                400, "%s not in SAML2 response" % (self._mxid_source_attribute,)
+            )
+
         self._outstanding_requests_dict.pop(saml2_auth.in_response_to, None)
 
-        username = saml2_auth.ava["uid"][0]
         displayName = saml2_auth.ava.get("displayName", [None])[0]
 
-        return self._sso_auth_handler.on_successful_auth(
-            username, request, relay_state, user_display_name=displayName
-        )
+        with (await self._mapping_lock.queue(self._auth_provider_id)):
+            # first of all, check if we already have a mapping for this user
+            logger.info(
+                "Looking for existing mapping for user %s:%s",
+                self._auth_provider_id,
+                remote_user_id,
+            )
+            registered_user_id = await self._datastore.get_user_by_external_id(
+                self._auth_provider_id, remote_user_id
+            )
+            if registered_user_id is not None:
+                logger.info("Found existing mapping %s", registered_user_id)
+                return registered_user_id
+
+            # backwards-compatibility hack: see if there is an existing user with a
+            # suitable mapping from the uid
+            if (
+                self._grandfathered_mxid_source_attribute
+                and self._grandfathered_mxid_source_attribute in saml2_auth.ava
+            ):
+                attrval = saml2_auth.ava[self._grandfathered_mxid_source_attribute][0]
+                user_id = UserID(
+                    map_username_to_mxid_localpart(attrval), self._hostname
+                ).to_string()
+                logger.info(
+                    "Looking for existing account based on mapped %s %s",
+                    self._grandfathered_mxid_source_attribute,
+                    user_id,
+                )
+
+                users = await self._datastore.get_users_by_id_case_insensitive(user_id)
+                if users:
+                    registered_user_id = list(users.keys())[0]
+                    logger.info("Grandfathering mapping to %s", registered_user_id)
+                    await self._datastore.record_user_external_id(
+                        self._auth_provider_id, remote_user_id, registered_user_id
+                    )
+                    return registered_user_id
+
+            # figure out a new mxid for this user
+            base_mxid_localpart = self._mxid_mapper(mxid_source)
+
+            suffix = 0
+            while True:
+                localpart = base_mxid_localpart + (str(suffix) if suffix else "")
+                if not await self._datastore.get_users_by_id_case_insensitive(
+                    UserID(localpart, self._hostname).to_string()
+                ):
+                    break
+                suffix += 1
+            logger.info("Allocating mxid for new user with localpart %s", localpart)
+
+            registered_user_id = await self._registration_handler.register_user(
+                localpart=localpart, default_display_name=displayName
+            )
+            await self._datastore.record_user_external_id(
+                self._auth_provider_id, remote_user_id, registered_user_id
+            )
+            return registered_user_id
 
     def expire_sessions(self):
         expire_before = self._clock.time_msec() - self._saml2_session_lifetime
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 25a1b67092..9cddbc752a 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -29,6 +29,7 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.http.site import SynapseRequest
 from synapse.rest.client.v2_alpha._base import client_patterns
 from synapse.rest.well_known import WellKnownBuilder
 from synapse.types import UserID, map_username_to_mxid_localpart
@@ -507,6 +508,19 @@ class SSOAuthHandler(object):
                 localpart=localpart, default_display_name=user_display_name
             )
 
+        self.complete_sso_login(registered_user_id, request, client_redirect_url)
+
+    def complete_sso_login(
+        self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str
+    ):
+        """Having figured out a mxid for this user, complete the HTTP request
+
+        Args:
+            registered_user_id:
+            request:
+            client_redirect_url:
+        """
+
         login_token = self._macaroon_gen.generate_short_term_login_token(
             registered_user_id
         )
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 109052fa41..cc1b1b73c9 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -22,6 +22,7 @@ from six import iterkeys
 from six.moves import range
 
 from twisted.internet import defer
+from twisted.internet.defer import Deferred
 
 from synapse.api.constants import UserTypes
 from synapse.api.errors import Codes, StoreError, ThreepidValidationError
@@ -384,6 +385,26 @@ class RegistrationWorkerStore(SQLBaseStore):
 
         return self.runInteraction("get_users_by_id_case_insensitive", f)
 
+    async def get_user_by_external_id(
+        self, auth_provider: str, external_id: str
+    ) -> str:
+        """Look up a user by their external auth id
+
+        Args:
+            auth_provider: identifier for the remote auth provider
+            external_id: id on that system
+
+        Returns:
+            str|None: the mxid of the user, or None if they are not known
+        """
+        return await self._simple_select_one_onecol(
+            table="user_external_ids",
+            keyvalues={"auth_provider": auth_provider, "external_id": external_id},
+            retcol="user_id",
+            allow_none=True,
+            desc="get_user_by_external_id",
+        )
+
     @defer.inlineCallbacks
     def count_all_users(self):
         """Counts all users registered on the homeserver."""
@@ -999,6 +1020,26 @@ class RegistrationStore(
         self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
         txn.call_after(self.is_guest.invalidate, (user_id,))
 
+    def record_user_external_id(
+        self, auth_provider: str, external_id: str, user_id: str
+    ) -> Deferred:
+        """Record a mapping from an external user id to a mxid
+
+        Args:
+            auth_provider: identifier for the remote auth provider
+            external_id: id on that system
+            user_id: complete mxid that it is mapped to
+        """
+        return self._simple_insert(
+            table="user_external_ids",
+            values={
+                "auth_provider": auth_provider,
+                "external_id": external_id,
+                "user_id": user_id,
+            },
+            desc="record_user_external_id",
+        )
+
     def user_set_password_hash(self, user_id, password_hash):
         """
         NB. This does *not* evict any cache because the one use for this
diff --git a/synapse/storage/schema/delta/56/user_external_ids.sql b/synapse/storage/schema/delta/56/user_external_ids.sql
new file mode 100644
index 0000000000..91390c4527
--- /dev/null
+++ b/synapse/storage/schema/delta/56/user_external_ids.sql
@@ -0,0 +1,24 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * a table which records mappings from external auth providers to mxids
+ */
+CREATE TABLE IF NOT EXISTS user_external_ids (
+    auth_provider TEXT NOT NULL,
+    external_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    UNIQUE (auth_provider, external_id)
+);