summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
Diffstat (limited to 'synapse')
-rw-r--r--synapse/api/auth.py8
-rw-r--r--synapse/config/_base.py5
-rw-r--r--synapse/config/api.py12
-rw-r--r--synapse/config/appservice.py10
-rw-r--r--synapse/config/captcha.py23
-rw-r--r--synapse/config/database.py3
-rw-r--r--synapse/config/groups.py4
-rw-r--r--synapse/config/key.py37
-rw-r--r--synapse/config/metrics.py4
-rw-r--r--synapse/config/password.py15
-rw-r--r--synapse/config/ratelimiting.py37
-rw-r--r--synapse/config/registration.py25
-rw-r--r--synapse/config/repository.py77
-rw-r--r--synapse/config/saml2_config.py2
-rw-r--r--synapse/config/server.py6
-rw-r--r--synapse/config/voip.py8
-rw-r--r--synapse/handlers/auth.py28
-rw-r--r--synapse/handlers/message.py13
-rw-r--r--synapse/handlers/register.py5
-rw-r--r--synapse/handlers/user_directory.py173
-rw-r--r--synapse/storage/background_updates.py8
-rw-r--r--synapse/storage/schema/delta/53/user_dir_populate.sql30
-rw-r--r--synapse/storage/user_directory.py370
23 files changed, 536 insertions, 367 deletions
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 5992d30623..ee646a97e8 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -788,9 +788,11 @@ class Auth(object):
 
         # Never fail an auth check for the server notices users or support user
         # This can be a problem where event creation is prohibited due to blocking
-        is_support = yield self.store.is_support_user(user_id)
-        if user_id == self.hs.config.server_notices_mxid or is_support:
-            return
+        if user_id is not None:
+            if user_id == self.hs.config.server_notices_mxid:
+                return
+            if (yield self.store.is_support_user(user_id)):
+                return
 
         if self.hs.config.hs_disabled:
             raise ResourceLimitError(
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 5613f38e4d..a219a83550 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -405,7 +405,10 @@ class Config(object):
             self.invoke_all("generate_files", config)
             return
 
-        self.invoke_all("read_config", config)
+        self.parse_config_dict(config)
+
+    def parse_config_dict(self, config_dict):
+        self.invoke_all("read_config", config_dict)
 
 
 def find_config_files(search_paths):
diff --git a/synapse/config/api.py b/synapse/config/api.py
index e8a753f002..5eb4f86fa2 100644
--- a/synapse/config/api.py
+++ b/synapse/config/api.py
@@ -34,10 +34,10 @@ class ApiConfig(Config):
 
         # A list of event types that will be included in the room_invite_state
         #
-        room_invite_state_types:
-            - "{JoinRules}"
-            - "{CanonicalAlias}"
-            - "{RoomAvatar}"
-            - "{RoomEncryption}"
-            - "{Name}"
+        #room_invite_state_types:
+        #  - "{JoinRules}"
+        #  - "{CanonicalAlias}"
+        #  - "{RoomAvatar}"
+        #  - "{RoomEncryption}"
+        #  - "{Name}"
         """.format(**vars(EventTypes))
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index c260d59464..9e64c76544 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -37,14 +37,16 @@ class AppServiceConfig(Config):
 
     def default_config(cls, **kwargs):
         return """\
-        # A list of application service config file to use
+        # A list of application service config files to use
         #
-        app_service_config_files: []
+        #app_service_config_files:
+        #  - app_service_1.yaml
+        #  - app_service_2.yaml
 
-        # Whether or not to track application service IP addresses. Implicitly
+        # Uncomment to enable tracking of application service IP addresses. Implicitly
         # enables MAU tracking for application service users.
         #
-        track_appservice_user_ips: False
+        #track_appservice_user_ips: True
         """
 
 
diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py
index d25196be08..f7eebf26d2 100644
--- a/synapse/config/captcha.py
+++ b/synapse/config/captcha.py
@@ -18,11 +18,16 @@ from ._base import Config
 class CaptchaConfig(Config):
 
     def read_config(self, config):
-        self.recaptcha_private_key = config["recaptcha_private_key"]
-        self.recaptcha_public_key = config["recaptcha_public_key"]
-        self.enable_registration_captcha = config["enable_registration_captcha"]
+        self.recaptcha_private_key = config.get("recaptcha_private_key")
+        self.recaptcha_public_key = config.get("recaptcha_public_key")
+        self.enable_registration_captcha = config.get(
+            "enable_registration_captcha", False
+        )
         self.captcha_bypass_secret = config.get("captcha_bypass_secret")
-        self.recaptcha_siteverify_api = config["recaptcha_siteverify_api"]
+        self.recaptcha_siteverify_api = config.get(
+            "recaptcha_siteverify_api",
+            "https://www.recaptcha.net/recaptcha/api/siteverify",
+        )
 
     def default_config(self, **kwargs):
         return """\
@@ -31,21 +36,23 @@ class CaptchaConfig(Config):
 
         # This Home Server's ReCAPTCHA public key.
         #
-        recaptcha_public_key: "YOUR_PUBLIC_KEY"
+        #recaptcha_public_key: "YOUR_PUBLIC_KEY"
 
         # This Home Server's ReCAPTCHA private key.
         #
-        recaptcha_private_key: "YOUR_PRIVATE_KEY"
+        #recaptcha_private_key: "YOUR_PRIVATE_KEY"
 
         # Enables ReCaptcha checks when registering, preventing signup
         # unless a captcha is answered. Requires a valid ReCaptcha
         # public/private key.
         #
-        enable_registration_captcha: False
+        #enable_registration_captcha: false
 
         # A secret key used to bypass the captcha test entirely.
+        #
         #captcha_bypass_secret: "YOUR_SECRET_HERE"
 
         # The API endpoint to use for verifying m.login.recaptcha responses.
-        recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify"
+        #
+        #recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify"
         """
diff --git a/synapse/config/database.py b/synapse/config/database.py
index 63e9cb63f8..3c27ed6b4a 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -60,7 +60,8 @@ class DatabaseConfig(Config):
             database: "%(database_path)s"
 
         # Number of events to cache in memory.
-        event_cache_size: "10K"
+        #
+        #event_cache_size: 10K
         """ % locals()
 
     def read_arguments(self, args):
diff --git a/synapse/config/groups.py b/synapse/config/groups.py
index 46933a904c..e4be172a79 100644
--- a/synapse/config/groups.py
+++ b/synapse/config/groups.py
@@ -23,9 +23,9 @@ class GroupsConfig(Config):
 
     def default_config(self, **kwargs):
         return """\
-        # Whether to allow non server admins to create groups on this server
+        # Uncomment to allow non-server-admin users to create groups on this server
         #
-        enable_group_creation: false
+        #enable_group_creation: true
 
         # If enabled, non server admins can only create groups with local parts
         # starting with this prefix
diff --git a/synapse/config/key.py b/synapse/config/key.py
index 35f05fa974..933928885a 100644
--- a/synapse/config/key.py
+++ b/synapse/config/key.py
@@ -38,15 +38,26 @@ logger = logging.getLogger(__name__)
 class KeyConfig(Config):
 
     def read_config(self, config):
-        self.signing_key = self.read_signing_key(config["signing_key_path"])
+        # the signing key can be specified inline or in a separate file
+        if "signing_key" in config:
+            self.signing_key = read_signing_keys([config["signing_key"]])
+        else:
+            self.signing_key = self.read_signing_key(config["signing_key_path"])
+
         self.old_signing_keys = self.read_old_signing_keys(
             config.get("old_signing_keys", {})
         )
         self.key_refresh_interval = self.parse_duration(
-            config["key_refresh_interval"]
+            config.get("key_refresh_interval", "1d"),
         )
         self.perspectives = self.read_perspectives(
-            config["perspectives"]
+            config.get("perspectives", {}).get("servers", {
+                "matrix.org": {"verify_keys": {
+                    "ed25519:auto": {
+                        "key": "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw",
+                    }
+                }}
+            })
         )
 
         self.macaroon_secret_key = config.get(
@@ -88,7 +99,7 @@ class KeyConfig(Config):
 
         # Used to enable access token expiration.
         #
-        expire_access_token: False
+        #expire_access_token: False
 
         # a secret which is used to calculate HMACs for form values, to stop
         # falsification of values. Must be specified for the User Consent
@@ -117,21 +128,21 @@ class KeyConfig(Config):
         # Determines how quickly servers will query to check which keys
         # are still valid.
         #
-        key_refresh_interval: "1d" # 1 Day.
+        #key_refresh_interval: 1d
 
         # The trusted servers to download signing keys from.
         #
-        perspectives:
-          servers:
-            "matrix.org":
-              verify_keys:
-                "ed25519:auto":
-                  key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw"
+        #perspectives:
+        #  servers:
+        #    "matrix.org":
+        #      verify_keys:
+        #        "ed25519:auto":
+        #          key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw"
         """ % locals()
 
-    def read_perspectives(self, perspectives_config):
+    def read_perspectives(self, perspectives_servers):
         servers = {}
-        for server_name, server_config in perspectives_config["servers"].items():
+        for server_name, server_config in perspectives_servers.items():
             for key_id, key_data in server_config["verify_keys"].items():
                 if is_signing_algorithm_supported(key_id):
                     key_base64 = key_data["key"]
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index ed0498c634..2de51979d8 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -24,7 +24,7 @@ MISSING_SENTRY = (
 
 class MetricsConfig(Config):
     def read_config(self, config):
-        self.enable_metrics = config["enable_metrics"]
+        self.enable_metrics = config.get("enable_metrics", False)
         self.report_stats = config.get("report_stats", None)
         self.metrics_port = config.get("metrics_port")
         self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1")
@@ -48,7 +48,7 @@ class MetricsConfig(Config):
 
         # Enable collection and rendering of performance metrics
         #
-        enable_metrics: False
+        #enable_metrics: False
 
         # Enable sentry integration
         # NOTE: While attempts are made to ensure that the logs don't contain
diff --git a/synapse/config/password.py b/synapse/config/password.py
index 2a52b9db54..eea59e772b 100644
--- a/synapse/config/password.py
+++ b/synapse/config/password.py
@@ -22,16 +22,21 @@ class PasswordConfig(Config):
 
     def read_config(self, config):
         password_config = config.get("password_config", {})
+        if password_config is None:
+            password_config = {}
+
         self.password_enabled = password_config.get("enabled", True)
         self.password_pepper = password_config.get("pepper", "")
 
     def default_config(self, config_dir_path, server_name, **kwargs):
-        return """
-        # Enable password for login.
-        #
+        return """\
         password_config:
-           enabled: true
+           # Uncomment to disable password login
+           #
+           #enabled: false
+
            # Uncomment and change to a secret random string for extra security.
            # DO NOT CHANGE THIS AFTER INITIAL SETUP!
-           #pepper: ""
+           #
+           #pepper: "EVEN_MORE_SECRET"
         """
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 649f018356..898a19dd8c 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -24,20 +24,23 @@ class RateLimitConfig(object):
 class RatelimitConfig(Config):
 
     def read_config(self, config):
-        self.rc_messages_per_second = config["rc_messages_per_second"]
-        self.rc_message_burst_count = config["rc_message_burst_count"]
+        self.rc_messages_per_second = config.get("rc_messages_per_second", 0.2)
+        self.rc_message_burst_count = config.get("rc_message_burst_count", 10.0)
 
         self.rc_registration = RateLimitConfig(config.get("rc_registration", {}))
 
         rc_login_config = config.get("rc_login", {})
         self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {}))
         self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {}))
+        self.rc_login_failed_attempts = RateLimitConfig(
+            rc_login_config.get("failed_attempts", {}),
+        )
 
-        self.federation_rc_window_size = config["federation_rc_window_size"]
-        self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"]
-        self.federation_rc_sleep_delay = config["federation_rc_sleep_delay"]
-        self.federation_rc_reject_limit = config["federation_rc_reject_limit"]
-        self.federation_rc_concurrent = config["federation_rc_concurrent"]
+        self.federation_rc_window_size = config.get("federation_rc_window_size", 1000)
+        self.federation_rc_sleep_limit = config.get("federation_rc_sleep_limit", 10)
+        self.federation_rc_sleep_delay = config.get("federation_rc_sleep_delay", 500)
+        self.federation_rc_reject_limit = config.get("federation_rc_reject_limit", 50)
+        self.federation_rc_concurrent = config.get("federation_rc_concurrent", 3)
 
     def default_config(self, **kwargs):
         return """\
@@ -45,11 +48,11 @@ class RatelimitConfig(Config):
 
         # Number of messages a client can send per second
         #
-        rc_messages_per_second: 0.2
+        #rc_messages_per_second: 0.2
 
         # Number of message a client can send before being throttled
         #
-        rc_message_burst_count: 10.0
+        #rc_message_burst_count: 10.0
 
         # Ratelimiting settings for registration and login.
         #
@@ -64,6 +67,9 @@ class RatelimitConfig(Config):
         #     address.
         #   - one for login that ratelimits login requests based on the account the
         #     client is attempting to log into.
+        #   - one for login that ratelimits login requests based on the account the
+        #     client is attempting to log into, based on the amount of failed login
+        #     attempts for this account.
         #
         # The defaults are as shown below.
         #
@@ -78,28 +84,31 @@ class RatelimitConfig(Config):
         #  account:
         #    per_second: 0.17
         #    burst_count: 3
+        #  failed_attempts:
+        #    per_second: 0.17
+        #    burst_count: 3
 
         # The federation window size in milliseconds
         #
-        federation_rc_window_size: 1000
+        #federation_rc_window_size: 1000
 
         # The number of federation requests from a single server in a window
         # before the server will delay processing the request.
         #
-        federation_rc_sleep_limit: 10
+        #federation_rc_sleep_limit: 10
 
         # The duration in milliseconds to delay processing events from
         # remote servers by if they go over the sleep limit.
         #
-        federation_rc_sleep_delay: 500
+        #federation_rc_sleep_delay: 500
 
         # The maximum number of concurrent federation requests allowed
         # from a single server
         #
-        federation_rc_reject_limit: 50
+        #federation_rc_reject_limit: 50
 
         # The number of federation requests to concurrently process from a
         # single server
         #
-        federation_rc_concurrent: 3
+        #federation_rc_concurrent: 3
         """
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index a123f25a68..f6b2b9ceee 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -24,7 +24,7 @@ class RegistrationConfig(Config):
 
     def read_config(self, config):
         self.enable_registration = bool(
-            strtobool(str(config["enable_registration"]))
+            strtobool(str(config.get("enable_registration", False)))
         )
         if "disable_registration" in config:
             self.enable_registration = not bool(
@@ -36,7 +36,10 @@ class RegistrationConfig(Config):
         self.registration_shared_secret = config.get("registration_shared_secret")
 
         self.bcrypt_rounds = config.get("bcrypt_rounds", 12)
-        self.trusted_third_party_id_servers = config["trusted_third_party_id_servers"]
+        self.trusted_third_party_id_servers = config.get(
+            "trusted_third_party_id_servers",
+            ["matrix.org", "vector.im"],
+        )
         self.default_identity_server = config.get("default_identity_server")
         self.allow_guest_access = config.get("allow_guest_access", False)
 
@@ -64,11 +67,13 @@ class RegistrationConfig(Config):
 
         return """\
         ## Registration ##
+        #
         # Registration can be rate-limited using the parameters in the "Ratelimiting"
         # section of this file.
 
         # Enable registration for new users.
-        enable_registration: False
+        #
+        #enable_registration: false
 
         # The user must provide all of the below types of 3PID when registering.
         #
@@ -79,7 +84,7 @@ class RegistrationConfig(Config):
         # Explicitly disable asking for MSISDNs from the registration
         # flow (overrides registrations_require_3pid if MSISDNs are set as required)
         #
-        #disable_msisdn_registration: True
+        #disable_msisdn_registration: true
 
         # Mandate that users are only allowed to associate certain formats of
         # 3PIDs with accounts on this server.
@@ -103,13 +108,13 @@ class RegistrationConfig(Config):
         # N.B. that increasing this will exponentially increase the time required
         # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins.
         #
-        bcrypt_rounds: 12
+        #bcrypt_rounds: 12
 
         # Allows users to register as guests without a password/email/etc, and
         # participate in rooms hosted on this server which have been made
         # accessible to anonymous users.
         #
-        allow_guest_access: False
+        #allow_guest_access: false
 
         # The identity server which we suggest that clients should use when users log
         # in on this server.
@@ -125,9 +130,9 @@ class RegistrationConfig(Config):
         # Also defines the ID server which will be called when an account is
         # deactivated (one will be picked arbitrarily).
         #
-        trusted_third_party_id_servers:
-          - matrix.org
-          - vector.im
+        #trusted_third_party_id_servers:
+        #  - matrix.org
+        #  - vector.im
 
         # Users who register on this homeserver will automatically be joined
         # to these rooms
@@ -141,7 +146,7 @@ class RegistrationConfig(Config):
         # Setting to false means that if the rooms are not manually created,
         # users cannot be auto-joined since they do not exist.
         #
-        autocreate_auto_join_rooms: true
+        #autocreate_auto_join_rooms: true
         """ % locals()
 
     def add_arguments(self, parser):
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 97db2a5b7a..3f34ad9b2a 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -19,6 +19,36 @@ from synapse.util.module_loader import load_module
 
 from ._base import Config, ConfigError
 
+DEFAULT_THUMBNAIL_SIZES = [
+    {
+        "width": 32,
+        "height": 32,
+        "method": "crop",
+    }, {
+        "width": 96,
+        "height": 96,
+        "method": "crop",
+    }, {
+        "width": 320,
+        "height": 240,
+        "method": "scale",
+    }, {
+        "width": 640,
+        "height": 480,
+        "method": "scale",
+    }, {
+        "width": 800,
+        "height": 600,
+        "method": "scale"
+    },
+]
+
+THUMBNAIL_SIZE_YAML = """\
+        #  - width: %(width)i
+        #    height: %(height)i
+        #    method: %(method)s
+"""
+
 MISSING_NETADDR = (
     "Missing netaddr library. This is required for URL preview API."
 )
@@ -77,9 +107,9 @@ def parse_thumbnail_requirements(thumbnail_sizes):
 
 class ContentRepositoryConfig(Config):
     def read_config(self, config):
-        self.max_upload_size = self.parse_size(config["max_upload_size"])
-        self.max_image_pixels = self.parse_size(config["max_image_pixels"])
-        self.max_spider_size = self.parse_size(config["max_spider_size"])
+        self.max_upload_size = self.parse_size(config.get("max_upload_size", "10M"))
+        self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
+        self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))
 
         self.media_store_path = self.ensure_directory(config["media_store_path"])
 
@@ -139,9 +169,9 @@ class ContentRepositoryConfig(Config):
             )
 
         self.uploads_path = self.ensure_directory(config["uploads_path"])
-        self.dynamic_thumbnails = config["dynamic_thumbnails"]
+        self.dynamic_thumbnails = config.get("dynamic_thumbnails", False)
         self.thumbnail_requirements = parse_thumbnail_requirements(
-            config["thumbnail_sizes"]
+            config.get("thumbnail_sizes", DEFAULT_THUMBNAIL_SIZES),
         )
         self.url_preview_enabled = config.get("url_preview_enabled", False)
         if self.url_preview_enabled:
@@ -178,6 +208,13 @@ class ContentRepositoryConfig(Config):
     def default_config(self, data_dir_path, **kwargs):
         media_store = os.path.join(data_dir_path, "media_store")
         uploads_path = os.path.join(data_dir_path, "uploads")
+
+        formatted_thumbnail_sizes = "".join(
+            THUMBNAIL_SIZE_YAML % s for s in DEFAULT_THUMBNAIL_SIZES
+        )
+        # strip final NL
+        formatted_thumbnail_sizes = formatted_thumbnail_sizes[:-1]
+
         return r"""
         # Directory where uploaded images and attachments are stored.
         #
@@ -204,11 +241,11 @@ class ContentRepositoryConfig(Config):
 
         # The largest allowed upload size in bytes
         #
-        max_upload_size: "10M"
+        #max_upload_size: 10M
 
         # Maximum number of pixels that will be thumbnailed
         #
-        max_image_pixels: "32M"
+        #max_image_pixels: 32M
 
         # Whether to generate new thumbnails on the fly to precisely match
         # the resolution requested by the client. If true then whenever
@@ -216,32 +253,18 @@ class ContentRepositoryConfig(Config):
         # generate a new thumbnail. If false the server will pick a thumbnail
         # from a precalculated list.
         #
-        dynamic_thumbnails: false
+        #dynamic_thumbnails: false
 
         # List of thumbnails to precalculate when an image is uploaded.
         #
-        thumbnail_sizes:
-        - width: 32
-          height: 32
-          method: crop
-        - width: 96
-          height: 96
-          method: crop
-        - width: 320
-          height: 240
-          method: scale
-        - width: 640
-          height: 480
-          method: scale
-        - width: 800
-          height: 600
-          method: scale
+        #thumbnail_sizes:
+%(formatted_thumbnail_sizes)s
 
         # Is the preview URL API enabled?  If enabled, you *must* specify
         # an explicit url_preview_ip_range_blacklist of IPs that the spider is
         # denied from accessing.
         #
-        url_preview_enabled: False
+        #url_preview_enabled: false
 
         # List of IP address CIDR ranges that the URL preview spider is denied
         # from accessing.  There are no defaults: you must explicitly
@@ -306,6 +329,6 @@ class ContentRepositoryConfig(Config):
         #  - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'
 
         # The largest allowed URL preview spidering size in bytes
-        max_spider_size: "10M"
-
+        #
+        #max_spider_size: 10M
         """ % locals()
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index aff0a1f00c..39b9eb29c2 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -64,7 +64,7 @@ class SAML2Config(Config):
         }
 
     def default_config(self, config_dir_path, server_name, **kwargs):
-        return """
+        return """\
         # Enable SAML2 for registration and login. Uses pysaml2.
         #
         # `sp_config` is the configuration for the pysaml2 Service Provider.
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 35a322fee0..499eb30bea 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -45,7 +45,7 @@ class ServerConfig(Config):
 
         self.pid_file = self.abspath(config.get("pid_file"))
         self.web_client_location = config.get("web_client_location", None)
-        self.soft_file_limit = config["soft_file_limit"]
+        self.soft_file_limit = config.get("soft_file_limit", 0)
         self.daemonize = config.get("daemonize")
         self.print_pidfile = config.get("print_pidfile")
         self.user_agent_suffix = config.get("user_agent_suffix")
@@ -307,11 +307,11 @@ class ServerConfig(Config):
         # Zero is used to indicate synapse should set the soft limit to the
         # hard limit.
         #
-        soft_file_limit: 0
+        #soft_file_limit: 0
 
         # Set to false to disable presence tracking on this homeserver.
         #
-        use_presence: true
+        #use_presence: false
 
         # The GC threshold parameters to pass to `gc.set_threshold`, if defined
         #
diff --git a/synapse/config/voip.py b/synapse/config/voip.py
index 257f7c86e7..2a1f005a37 100644
--- a/synapse/config/voip.py
+++ b/synapse/config/voip.py
@@ -22,7 +22,9 @@ class VoipConfig(Config):
         self.turn_shared_secret = config.get("turn_shared_secret")
         self.turn_username = config.get("turn_username")
         self.turn_password = config.get("turn_password")
-        self.turn_user_lifetime = self.parse_duration(config["turn_user_lifetime"])
+        self.turn_user_lifetime = self.parse_duration(
+            config.get("turn_user_lifetime", "1h"),
+        )
         self.turn_allow_guests = config.get("turn_allow_guests", True)
 
     def default_config(self, **kwargs):
@@ -45,7 +47,7 @@ class VoipConfig(Config):
 
         # How long generated TURN credentials last
         #
-        turn_user_lifetime: "1h"
+        #turn_user_lifetime: 1h
 
         # Whether guests should be allowed to use the TURN server.
         # This defaults to True, otherwise VoIP will be unreliable for guests.
@@ -53,5 +55,5 @@ class VoipConfig(Config):
         # connect to arbitrary endpoints without having first signed up for a
         # valid account (e.g. by passing a CAPTCHA).
         #
-        turn_allow_guests: True
+        #turn_allow_guests: True
         """
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 74f3790f25..caad9ae2dd 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -101,6 +101,7 @@ class AuthHandler(BaseHandler):
         self._supported_login_types = login_types
 
         self._account_ratelimiter = Ratelimiter()
+        self._failed_attempts_ratelimiter = Ratelimiter()
 
         self._clock = self.hs.get_clock()
 
@@ -729,9 +730,16 @@ class AuthHandler(BaseHandler):
         if not known_login_type:
             raise SynapseError(400, "Unknown login type %s" % login_type)
 
-        # unknown username or invalid password. We raise a 403 here, but note
-        # that if we're doing user-interactive login, it turns all LoginErrors
-        # into a 401 anyway.
+        # unknown username or invalid password.
+        self._failed_attempts_ratelimiter.ratelimit(
+            qualified_user_id.lower(), time_now_s=self._clock.time(),
+            rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
+            burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
+            update=True,
+        )
+
+        # We raise a 403 here, but note that if we're doing user-interactive
+        # login, it turns all LoginErrors into a 401 anyway.
         raise LoginError(
             403, "Invalid password",
             errcode=Codes.FORBIDDEN
@@ -956,13 +964,23 @@ class AuthHandler(BaseHandler):
     def ratelimit_login_per_account(self, user_id):
         """Checks whether the process must be stopped because of ratelimiting.
 
+        Checks against two ratelimiters: the generic one for login attempts per
+        account and the one specific to failed attempts.
+
         Args:
             user_id (unicode): complete @user:id
 
         Raises:
-            LimitExceededError if the ratelimiter's login requests count for this
-                user is too high too proceed.
+            LimitExceededError if one of the ratelimiters' login requests count
+                for this user is too high too proceed.
         """
+        self._failed_attempts_ratelimiter.ratelimit(
+            user_id.lower(), time_now_s=self._clock.time(),
+            rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
+            burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
+            update=False,
+        )
+
         self._account_ratelimiter.ratelimit(
             user_id.lower(), time_now_s=self._clock.time(),
             rate_hz=self.hs.config.rc_login_account.per_second,
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index c762b58902..55787563c0 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -243,7 +243,14 @@ class EventCreationHandler(object):
 
         self.spam_checker = hs.get_spam_checker()
 
-        if self.config.block_events_without_consent_error is not None:
+        self._block_events_without_consent_error = (
+            self.config.block_events_without_consent_error
+        )
+
+        # we need to construct a ConsentURIBuilder here, as it checks that the necessary
+        # config options, but *only* if we have a configuration for which we are
+        # going to need it.
+        if self._block_events_without_consent_error:
             self._consent_uri_builder = ConsentURIBuilder(self.config)
 
     @defer.inlineCallbacks
@@ -378,7 +385,7 @@ class EventCreationHandler(object):
         Raises:
             ConsentNotGivenError: if the user has not given consent yet
         """
-        if self.config.block_events_without_consent_error is None:
+        if self._block_events_without_consent_error is None:
             return
 
         # exempt AS users from needing consent
@@ -405,7 +412,7 @@ class EventCreationHandler(object):
         consent_uri = self._consent_uri_builder.build_user_consent_uri(
             requester.user.localpart,
         )
-        msg = self.config.block_events_without_consent_error % {
+        msg = self._block_events_without_consent_error % {
             'consent_uri': consent_uri,
         }
         raise ConsentNotGivenError(
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 0ec16b1d2e..68f73d3793 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -23,6 +23,7 @@ from synapse.api.constants import LoginType
 from synapse.api.errors import (
     AuthError,
     Codes,
+    ConsentNotGivenError,
     InvalidCaptchaError,
     LimitExceededError,
     RegistrationError,
@@ -311,6 +312,10 @@ class RegistrationHandler(BaseHandler):
                         )
                 else:
                     yield self._join_user_to_room(fake_requester, r)
+            except ConsentNotGivenError as e:
+                # Technically not necessary to pull out this error though
+                # moving away from bare excepts is a good thing to do.
+                logger.error("Failed to join new user to %r: %r", r, e)
             except Exception as e:
                 logger.error("Failed to join new user to %r: %r", r, e)
 
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index d92f8c529c..7dc0e236e7 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -38,18 +38,8 @@ class UserDirectoryHandler(object):
     world_readable or publically joinable room. We keep a database table up to date
     by streaming changes of the current state and recalculating whether users should
     be in the directory or not when necessary.
-
-    For each user in the directory we also store a room_id which is public and that the
-    user is joined to. This allows us to ignore history_visibility and join_rules changes
-    for that user in all other public rooms, as we know they'll still be in at least
-    one public room.
     """
 
-    INITIAL_ROOM_SLEEP_MS = 50
-    INITIAL_ROOM_SLEEP_COUNT = 100
-    INITIAL_ROOM_BATCH_SIZE = 100
-    INITIAL_USER_SLEEP_MS = 10
-
     def __init__(self, hs):
         self.store = hs.get_datastore()
         self.state = hs.get_state_handler()
@@ -59,17 +49,6 @@ class UserDirectoryHandler(object):
         self.is_mine_id = hs.is_mine_id
         self.update_user_directory = hs.config.update_user_directory
         self.search_all_users = hs.config.user_directory_search_all_users
-
-        # If we're a worker, don't sleep when doing the initial room work, as it
-        # won't monopolise the master's CPU.
-        if hs.config.worker_app:
-            self.INITIAL_ROOM_SLEEP_MS = 0
-            self.INITIAL_USER_SLEEP_MS = 0
-
-        # When start up for the first time we need to populate the user_directory.
-        # This is a set of user_id's we've inserted already
-        self.initially_handled_users = set()
-
         # The current position in the current_state_delta stream
         self.pos = None
 
@@ -132,7 +111,7 @@ class UserDirectoryHandler(object):
         # Support users are for diagnostics and should not appear in the user directory.
         if not is_support:
             yield self.store.update_profile_in_user_dir(
-                user_id, profile.display_name, profile.avatar_url, None
+                user_id, profile.display_name, profile.avatar_url
             )
 
     @defer.inlineCallbacks
@@ -149,10 +128,9 @@ class UserDirectoryHandler(object):
         if self.pos is None:
             self.pos = yield self.store.get_user_directory_stream_pos()
 
-        # If still None then we need to do the initial fill of directory
+        # If still None then the initial background update hasn't happened yet
         if self.pos is None:
-            yield self._do_initial_spam()
-            self.pos = yield self.store.get_user_directory_stream_pos()
+            defer.returnValue(None)
 
         # Loop round handling deltas until we're up to date
         while True:
@@ -174,133 +152,6 @@ class UserDirectoryHandler(object):
                 yield self.store.update_user_directory_stream_pos(self.pos)
 
     @defer.inlineCallbacks
-    def _do_initial_spam(self):
-        """Populates the user_directory from the current state of the DB, used
-        when synapse first starts with user_directory support
-        """
-        new_pos = yield self.store.get_max_stream_id_in_current_state_deltas()
-
-        # Delete any existing entries just in case there are any
-        yield self.store.delete_all_from_user_dir()
-
-        # We process by going through each existing room at a time.
-        room_ids = yield self.store.get_all_rooms()
-
-        logger.info("Doing initial update of user directory. %d rooms", len(room_ids))
-        num_processed_rooms = 0
-
-        for room_id in room_ids:
-            logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids))
-            yield self._handle_initial_room(room_id)
-            num_processed_rooms += 1
-            yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
-
-        logger.info("Processed all rooms.")
-
-        if self.search_all_users:
-            num_processed_users = 0
-            user_ids = yield self.store.get_all_local_users()
-            logger.info(
-                "Doing initial update of user directory. %d users", len(user_ids)
-            )
-            for user_id in user_ids:
-                # We add profiles for all users even if they don't match the
-                # include pattern, just in case we want to change it in future
-                logger.info(
-                    "Handling user %d/%d", num_processed_users + 1, len(user_ids)
-                )
-                yield self._handle_local_user(user_id)
-                num_processed_users += 1
-                yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0)
-
-            logger.info("Processed all users")
-
-        self.initially_handled_users = None
-
-        yield self.store.update_user_directory_stream_pos(new_pos)
-
-    @defer.inlineCallbacks
-    def _handle_initial_room(self, room_id):
-        """
-        Called when we initially fill out user_directory one room at a time
-        """
-        is_in_room = yield self.store.is_host_joined(room_id, self.server_name)
-        if not is_in_room:
-            return
-
-        is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
-            room_id
-        )
-
-        users_with_profile = yield self.state.get_current_user_in_room(room_id)
-        user_ids = set(users_with_profile)
-        unhandled_users = user_ids - self.initially_handled_users
-
-        yield self.store.add_profiles_to_user_dir(
-            {user_id: users_with_profile[user_id] for user_id in unhandled_users}
-        )
-
-        self.initially_handled_users |= unhandled_users
-
-        # We now go and figure out the new users who share rooms with user entries
-        # We sleep aggressively here as otherwise it can starve resources.
-        # We also batch up inserts/updates, but try to avoid too many at once.
-        to_insert = set()
-        count = 0
-
-        if is_public:
-            for user_id in user_ids:
-                if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
-                    yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
-
-                if self.store.get_if_app_services_interested_in_user(user_id):
-                    count += 1
-                    continue
-
-                to_insert.add(user_id)
-                if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE:
-                    yield self.store.add_users_in_public_rooms(room_id, to_insert)
-                    to_insert.clear()
-
-            if to_insert:
-                yield self.store.add_users_in_public_rooms(room_id, to_insert)
-                to_insert.clear()
-        else:
-
-            for user_id in user_ids:
-                if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
-                    yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
-
-                if not self.is_mine_id(user_id):
-                    count += 1
-                    continue
-
-                if self.store.get_if_app_services_interested_in_user(user_id):
-                    count += 1
-                    continue
-
-                for other_user_id in user_ids:
-                    if user_id == other_user_id:
-                        continue
-
-                    if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
-                        yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
-                    count += 1
-
-                    user_set = (user_id, other_user_id)
-                    to_insert.add(user_set)
-
-                    if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE:
-                        yield self.store.add_users_who_share_private_room(
-                            room_id, not is_public, to_insert
-                        )
-                        to_insert.clear()
-
-            if to_insert:
-                yield self.store.add_users_who_share_private_room(room_id, to_insert)
-                to_insert.clear()
-
-    @defer.inlineCallbacks
     def _handle_deltas(self, deltas):
         """Called with the state deltas to process
         """
@@ -449,7 +300,9 @@ class UserDirectoryHandler(object):
 
         row = yield self.store.get_user_in_directory(user_id)
         if not row:
-            yield self.store.add_profiles_to_user_dir({user_id: profile})
+            yield self.store.update_profile_in_user_dir(
+                user_id, profile.display_name, profile.avatar_url
+            )
 
     @defer.inlineCallbacks
     def _handle_new_user(self, room_id, user_id, profile):
@@ -461,9 +314,9 @@ class UserDirectoryHandler(object):
         """
         logger.debug("Adding new user to dir, %r", user_id)
 
-        row = yield self.store.get_user_in_directory(user_id)
-        if not row:
-            yield self.store.add_profiles_to_user_dir({user_id: profile})
+        yield self.store.update_profile_in_user_dir(
+            user_id, profile.display_name, profile.avatar_url
+        )
 
         is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
             room_id
@@ -479,7 +332,9 @@ class UserDirectoryHandler(object):
             # First, if they're our user then we need to update for every user
             if self.is_mine_id(user_id):
 
-                is_appservice = self.store.get_if_app_services_interested_in_user(user_id)
+                is_appservice = self.store.get_if_app_services_interested_in_user(
+                    user_id
+                )
 
                 # We don't care about appservice users.
                 if not is_appservice:
@@ -546,9 +401,7 @@ class UserDirectoryHandler(object):
         new_avatar = event.content.get("avatar_url")
 
         if prev_name != new_name or prev_avatar != new_avatar:
-            yield self.store.update_profile_in_user_dir(
-                user_id, new_name, new_avatar, room_id
-            )
+            yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar)
 
     @defer.inlineCallbacks
     def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 60cdc884e6..a2f8c23a65 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -52,7 +52,9 @@ class BackgroundUpdatePerformance(object):
         Returns:
             A duration in ms as a float
         """
-        if self.total_item_count == 0:
+        if self.avg_duration_ms == 0:
+            return 0
+        elif self.total_item_count == 0:
             return None
         else:
             # Use the exponential moving average so that we can adapt to
@@ -64,7 +66,9 @@ class BackgroundUpdatePerformance(object):
         Returns:
             A duration in ms as a float
         """
-        if self.total_item_count == 0:
+        if self.total_duration_ms == 0:
+            return 0
+        elif self.total_item_count == 0:
             return None
         else:
             return float(self.total_item_count) / float(self.total_duration_ms)
diff --git a/synapse/storage/schema/delta/53/user_dir_populate.sql b/synapse/storage/schema/delta/53/user_dir_populate.sql
new file mode 100644
index 0000000000..ffcc896b58
--- /dev/null
+++ b/synapse/storage/schema/delta/53/user_dir_populate.sql
@@ -0,0 +1,30 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Set up staging tables
+INSERT INTO background_updates (update_name, progress_json) VALUES
+    ('populate_user_directory_createtables', '{}');
+
+-- Run through each room and update the user directory according to who is in it
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables');
+
+-- Insert all users, if search_all_users is on
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms');
+
+-- Clean up staging tables
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users');
diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py
index 1c00b956e5..4ee653210f 100644
--- a/synapse/storage/user_directory.py
+++ b/synapse/storage/user_directory.py
@@ -16,12 +16,10 @@
 import logging
 import re
 
-from six import iteritems
-
 from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, JoinRules
-from synapse.storage._base import SQLBaseStore
+from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.storage.state import StateFilter
 from synapse.types import get_domain_from_id, get_localpart_from_id
@@ -30,7 +28,276 @@ from synapse.util.caches.descriptors import cached
 logger = logging.getLogger(__name__)
 
 
-class UserDirectoryStore(SQLBaseStore):
+TEMP_TABLE = "_temp_populate_user_directory"
+
+
+class UserDirectoryStore(BackgroundUpdateStore):
+    def __init__(self, db_conn, hs):
+        super(UserDirectoryStore, self).__init__(db_conn, hs)
+
+        self.server_name = hs.hostname
+
+        self.register_background_update_handler(
+            "populate_user_directory_createtables",
+            self._populate_user_directory_createtables,
+        )
+        self.register_background_update_handler(
+            "populate_user_directory_process_rooms",
+            self._populate_user_directory_process_rooms,
+        )
+        self.register_background_update_handler(
+            "populate_user_directory_process_users",
+            self._populate_user_directory_process_users,
+        )
+        self.register_background_update_handler(
+            "populate_user_directory_cleanup", self._populate_user_directory_cleanup
+        )
+
+    @defer.inlineCallbacks
+    def _populate_user_directory_createtables(self, progress, batch_size):
+
+        # Get all the rooms that we want to process.
+        def _make_staging_area(txn):
+            sql = (
+                "CREATE TABLE IF NOT EXISTS "
+                + TEMP_TABLE
+                + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)"
+            )
+            txn.execute(sql)
+
+            sql = (
+                "CREATE TABLE IF NOT EXISTS "
+                + TEMP_TABLE
+                + "_position(position TEXT NOT NULL)"
+            )
+            txn.execute(sql)
+
+            # Get rooms we want to process from the database
+            sql = """
+                SELECT room_id, count(*) FROM current_state_events
+                GROUP BY room_id
+            """
+            txn.execute(sql)
+            rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()]
+            self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms)
+            del rooms
+
+            # If search all users is on, get all the users we want to add.
+            if self.hs.config.user_directory_search_all_users:
+                sql = (
+                    "CREATE TABLE IF NOT EXISTS "
+                    + TEMP_TABLE
+                    + "_users(user_id TEXT NOT NULL)"
+                )
+                txn.execute(sql)
+
+                txn.execute("SELECT name FROM users")
+                users = [{"user_id": x[0]} for x in txn.fetchall()]
+
+                self._simple_insert_many_txn(txn, TEMP_TABLE + "_users", users)
+
+        new_pos = yield self.get_max_stream_id_in_current_state_deltas()
+        yield self.runInteraction(
+            "populate_user_directory_temp_build", _make_staging_area
+        )
+        yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos})
+
+        yield self._end_background_update("populate_user_directory_createtables")
+        defer.returnValue(1)
+
+    @defer.inlineCallbacks
+    def _populate_user_directory_cleanup(self, progress, batch_size):
+        """
+        Update the user directory stream position, then clean up the old tables.
+        """
+        position = yield self._simple_select_one_onecol(
+            TEMP_TABLE + "_position", None, "position"
+        )
+        yield self.update_user_directory_stream_pos(position)
+
+        def _delete_staging_area(txn):
+            txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms")
+            txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users")
+            txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position")
+
+        yield self.runInteraction(
+            "populate_user_directory_cleanup", _delete_staging_area
+        )
+
+        yield self._end_background_update("populate_user_directory_cleanup")
+        defer.returnValue(1)
+
+    @defer.inlineCallbacks
+    def _populate_user_directory_process_rooms(self, progress, batch_size):
+
+        state = self.hs.get_state_handler()
+
+        # If we don't have progress filed, delete everything.
+        if not progress:
+            yield self.delete_all_from_user_dir()
+
+        def _get_next_batch(txn):
+            sql = """
+                SELECT room_id FROM %s
+                ORDER BY events DESC
+                LIMIT %s
+            """ % (
+                TEMP_TABLE + "_rooms",
+                str(batch_size),
+            )
+            txn.execute(sql)
+            rooms_to_work_on = txn.fetchall()
+
+            if not rooms_to_work_on:
+                return None
+
+            rooms_to_work_on = [x[0] for x in rooms_to_work_on]
+
+            # Get how many are left to process, so we can give status on how
+            # far we are in processing
+            txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
+            progress["remaining"] = txn.fetchone()[0]
+
+            return rooms_to_work_on
+
+        rooms_to_work_on = yield self.runInteraction(
+            "populate_user_directory_temp_read", _get_next_batch
+        )
+
+        # No more rooms -- complete the transaction.
+        if not rooms_to_work_on:
+            yield self._end_background_update("populate_user_directory_process_rooms")
+            defer.returnValue(1)
+
+        logger.info(
+            "Processing the next %d rooms of %d remaining"
+            % (len(rooms_to_work_on), progress["remaining"])
+        )
+
+        for room_id in rooms_to_work_on:
+            is_in_room = yield self.is_host_joined(room_id, self.server_name)
+
+            if is_in_room:
+                is_public = yield self.is_room_world_readable_or_publicly_joinable(
+                    room_id
+                )
+
+                users_with_profile = yield state.get_current_user_in_room(room_id)
+                user_ids = set(users_with_profile)
+
+                # Update each user in the user directory.
+                for user_id, profile in users_with_profile.items():
+                    yield self.update_profile_in_user_dir(
+                        user_id, profile.display_name, profile.avatar_url
+                    )
+
+                to_insert = set()
+
+                if is_public:
+                    for user_id in user_ids:
+                        if self.get_if_app_services_interested_in_user(user_id):
+                            continue
+
+                        to_insert.add(user_id)
+
+                    if to_insert:
+                        yield self.add_users_in_public_rooms(room_id, to_insert)
+                        to_insert.clear()
+                else:
+                    for user_id in user_ids:
+                        if not self.hs.is_mine_id(user_id):
+                            continue
+
+                        if self.get_if_app_services_interested_in_user(user_id):
+                            continue
+
+                        for other_user_id in user_ids:
+                            if user_id == other_user_id:
+                                continue
+
+                            user_set = (user_id, other_user_id)
+                            to_insert.add(user_set)
+
+                    if to_insert:
+                        yield self.add_users_who_share_private_room(room_id, to_insert)
+                        to_insert.clear()
+
+            # We've finished a room. Delete it from the table.
+            yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id})
+            # Update the remaining counter.
+            progress["remaining"] -= 1
+            yield self.runInteraction(
+                "populate_user_directory",
+                self._background_update_progress_txn,
+                "populate_user_directory_process_rooms",
+                progress,
+            )
+
+        defer.returnValue(len(rooms_to_work_on))
+
+    @defer.inlineCallbacks
+    def _populate_user_directory_process_users(self, progress, batch_size):
+        """
+        If search_all_users is enabled, add all of the users to the user directory.
+        """
+        if not self.hs.config.user_directory_search_all_users:
+            yield self._end_background_update("populate_user_directory_process_users")
+            defer.returnValue(1)
+
+        def _get_next_batch(txn):
+            sql = "SELECT user_id FROM %s LIMIT %s" % (
+                TEMP_TABLE + "_users",
+                str(batch_size),
+            )
+            txn.execute(sql)
+            users_to_work_on = txn.fetchall()
+
+            if not users_to_work_on:
+                return None
+
+            users_to_work_on = [x[0] for x in users_to_work_on]
+
+            # Get how many are left to process, so we can give status on how
+            # far we are in processing
+            sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
+            txn.execute(sql)
+            progress["remaining"] = txn.fetchone()[0]
+
+            return users_to_work_on
+
+        users_to_work_on = yield self.runInteraction(
+            "populate_user_directory_temp_read", _get_next_batch
+        )
+
+        # No more users -- complete the transaction.
+        if not users_to_work_on:
+            yield self._end_background_update("populate_user_directory_process_users")
+            defer.returnValue(1)
+
+        logger.info(
+            "Processing the next %d users of %d remaining"
+            % (len(users_to_work_on), progress["remaining"])
+        )
+
+        for user_id in users_to_work_on:
+            profile = yield self.get_profileinfo(get_localpart_from_id(user_id))
+            yield self.update_profile_in_user_dir(
+                user_id, profile.display_name, profile.avatar_url
+            )
+
+            # We've finished processing a user. Delete it from the table.
+            yield self._simple_delete_one(TEMP_TABLE + "_users", {"user_id": user_id})
+            # Update the remaining counter.
+            progress["remaining"] -= 1
+            yield self.runInteraction(
+                "populate_user_directory",
+                self._background_update_progress_txn,
+                "populate_user_directory_process_users",
+                progress,
+            )
+
+        defer.returnValue(len(users_to_work_on))
+
     @defer.inlineCallbacks
     def is_room_world_readable_or_publicly_joinable(self, room_id):
         """Check if the room is either world_readable or publically joinable
@@ -62,89 +329,16 @@ class UserDirectoryStore(SQLBaseStore):
 
         defer.returnValue(False)
 
-    def add_profiles_to_user_dir(self, users_with_profile):
-        """Add profiles to the user directory
-
-        Args:
-            users_with_profile (dict): Users to add to directory in the form of
-                mapping of user_id -> ProfileInfo
+    def update_profile_in_user_dir(self, user_id, display_name, avatar_url):
+        """
+        Update or add a user's profile in the user directory.
         """
 
-        if isinstance(self.database_engine, PostgresEngine):
-            # We weight the loclpart most highly, then display name and finally
-            # server name
-            sql = """
-                INSERT INTO user_directory_search(user_id, vector)
-                VALUES (?,
-                    setweight(to_tsvector('english', ?), 'A')
-                    || setweight(to_tsvector('english', ?), 'D')
-                    || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
-                )
-            """
-            args = (
-                (
-                    user_id,
-                    get_localpart_from_id(user_id),
-                    get_domain_from_id(user_id),
-                    profile.display_name,
-                )
-                for user_id, profile in iteritems(users_with_profile)
-            )
-        elif isinstance(self.database_engine, Sqlite3Engine):
-            sql = """
-                INSERT INTO user_directory_search(user_id, value)
-                VALUES (?,?)
-            """
-            args = tuple(
-                (
-                    user_id,
-                    "%s %s" % (user_id, p.display_name) if p.display_name else user_id,
-                )
-                for user_id, p in iteritems(users_with_profile)
-            )
-        else:
-            # This should be unreachable.
-            raise Exception("Unrecognized database engine")
-
-        def _add_profiles_to_user_dir_txn(txn):
-            txn.executemany(sql, args)
-            self._simple_insert_many_txn(
-                txn,
-                table="user_directory",
-                values=[
-                    {
-                        "user_id": user_id,
-                        "room_id": None,
-                        "display_name": profile.display_name,
-                        "avatar_url": profile.avatar_url,
-                    }
-                    for user_id, profile in iteritems(users_with_profile)
-                ],
-            )
-            for user_id in users_with_profile:
-                txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
-
-        return self.runInteraction(
-            "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn
-        )
-
-    @defer.inlineCallbacks
-    def update_user_in_user_dir(self, user_id, room_id):
-        yield self._simple_update_one(
-            table="user_directory",
-            keyvalues={"user_id": user_id},
-            updatevalues={"room_id": room_id},
-            desc="update_user_in_user_dir",
-        )
-        self.get_user_in_directory.invalidate((user_id,))
-
-    def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id):
         def _update_profile_in_user_dir_txn(txn):
             new_entry = self._simple_upsert_txn(
                 txn,
                 table="user_directory",
                 keyvalues={"user_id": user_id},
-                insertion_values={"room_id": room_id},
                 values={"display_name": display_name, "avatar_url": avatar_url},
                 lock=False,  # We're only inserter
             )
@@ -282,18 +476,6 @@ class UserDirectoryStore(SQLBaseStore):
         defer.returnValue(user_ids)
 
     @defer.inlineCallbacks
-    def get_all_rooms(self):
-        """Get all room_ids we've ever known about, in ascending order of "size"
-        """
-        sql = """
-            SELECT room_id FROM current_state_events
-            GROUP BY room_id
-            ORDER BY count(*) ASC
-        """
-        rows = yield self._execute("get_all_rooms", None, sql)
-        defer.returnValue([room_id for room_id, in rows])
-
-    @defer.inlineCallbacks
     def get_all_local_users(self):
         """Get all local users
         """
@@ -553,8 +735,8 @@ class UserDirectoryStore(SQLBaseStore):
         """
 
         if self.hs.config.user_directory_search_all_users:
-            join_args = ()
-            where_clause = "1=1"
+            join_args = (user_id,)
+            where_clause = "user_id != ?"
         else:
             join_args = (user_id,)
             where_clause = """