16 files changed, 240 insertions, 18 deletions
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 3ea18eefcb..d8b4a846d8 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -72,3 +72,6 @@ Jason Robinson <jasonr at matrix.org>
 
 Joseph Weston <joseph at weston.cloud>
  + Add admin API for querying HS version
+
+Benjamin Saunders <ben.e.saunders at gmail dot com>
+ * Documentation improvements
diff --git a/README.rst b/README.rst
index 5409f0c563..13e11a5773 100644
--- a/README.rst
+++ b/README.rst
@@ -340,8 +340,11 @@ log lines and looking for any 'Processed request' lines which take more than
 a few seconds to execute. Please let us know at #synapse:matrix.org if
 you see this failure mode so we can help debug it, however.
 
-Help!! Synapse eats all my RAM!
--------------------------------
+Help!! Synapse is slow and eats all my RAM/CPU!
+-----------------------------------------------
+
+First, ensure you are running the latest version of Synapse, using Python 3
+with a PostgreSQL database.
 
 Synapse's architecture is quite RAM hungry currently - we deliberately
 cache a lot of recent room data and metadata in RAM in order to speed up
@@ -352,14 +355,29 @@ variable. The default is 0.5, which can be decreased to reduce RAM usage
 in memory constrained enviroments, or increased if performance starts to
 degrade.
 
+However, degraded performance due to a low cache factor, common on
+machines with slow disks, often leads to explosions in memory use due
+backlogged requests. In this case, reducing the cache factor will make
+things worse. Instead, try increasing it drastically. 2.0 is a good
+starting value.
+
 Using `libjemalloc <http://jemalloc.net/>`_ can also yield a significant
-improvement in overall amount, and especially in terms of giving back RAM
-to the OS. To use it, the library must simply be put in the LD_PRELOAD
-environment variable when launching Synapse. On Debian, this can be done
-by installing the ``libjemalloc1`` package and adding this line to
-``/etc/default/matrix-synapse``::
+improvement in overall memory use, and especially in terms of giving back
+RAM to the OS. To use it, the library must simply be put in the
+LD_PRELOAD environment variable when launching Synapse. On Debian, this
+can be done by installing the ``libjemalloc1`` package and adding this
+line to ``/etc/default/matrix-synapse``::
 
     LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.1
 
 This can make a significant difference on Python 2.7 - it's unclear how
 much of an improvement it provides on Python 3.x.
+
+If you're encountering high CPU use by the Synapse process itself, you
+may be affected by a bug with presence tracking that leads to a
+massive excess of outgoing federation requests (see `discussion
+<https://github.com/matrix-org/synapse/issues/3971>`_). If metrics
+indicate that your server is also issuing far more outgoing federation
+requests than can be accounted for by your users' activity, this is a
+likely cause. The misbehavior can be worked around by setting
+``use_presence: false`` in the Synapse config file.
diff --git a/changelog.d/4276.misc b/changelog.d/4276.misc
new file mode 100644
index 0000000000..285939a4b8
--- /dev/null
+++ b/changelog.d/4276.misc
@@ -0,0 +1 @@
+Improve README section on performance troubleshooting.
diff --git a/changelog.d/5015.misc b/changelog.d/5015.misc
new file mode 100644
index 0000000000..eeec85b92c
--- /dev/null
+++ b/changelog.d/5015.misc
@@ -0,0 +1 @@
+Add logging to 3pid invite signature verification.
diff --git a/changelog.d/5480.misc b/changelog.d/5480.misc
new file mode 100644
index 0000000000..3001bcc1fe
--- /dev/null
+++ b/changelog.d/5480.misc
@@ -0,0 +1 @@
+Add an EXPERIMENTAL config option to try and periodically clean up extremities by sending dummy events.
diff --git a/changelog.d/5490.bugfix b/changelog.d/5490.bugfix
new file mode 100644
index 0000000000..4242254c53
--- /dev/null
+++ b/changelog.d/5490.bugfix
@@ -0,0 +1 @@
+Fix failure to start under docker with SAML support enabled.
\ No newline at end of file
diff --git a/changelog.d/5493.misc b/changelog.d/5493.misc
new file mode 100644
index 0000000000..365e49d634
--- /dev/null
+++ b/changelog.d/5493.misc
@@ -0,0 +1 @@
+Track deactivated accounts in the database.
diff --git a/docker/Dockerfile b/docker/Dockerfile
index c35da67a2a..24921eb098 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -57,6 +57,7 @@ RUN pip install --prefix="/install" --no-warn-script-location \
 
 FROM docker.io/python:${PYTHON_VERSION}-alpine3.8
 
+# xmlsec is required for saml support
 RUN apk add --no-cache --virtual .runtime_deps \
         libffi \
         libjpeg-turbo \
@@ -64,7 +65,8 @@ RUN apk add --no-cache --virtual .runtime_deps \
         libxslt \
         libpq \
         zlib \
-        su-exec
+        su-exec \
+        xmlsec
 
 COPY --from=builder /install /usr/local
 COPY ./docker/start.py /start.py
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 7d56e2d141..6e5b46e6c3 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -317,6 +317,12 @@ class ServerConfig(Config):
 
         _check_resource_config(self.listeners)
 
+        # An experimental option to try and periodically clean up extremities
+        # by sending dummy events.
+        self.cleanup_extremities_with_dummy_events = config.get(
+            "cleanup_extremities_with_dummy_events", False,
+        )
+
     def has_tls_listener(self):
         return any(l["tls"] for l in self.listeners)
 
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 1edd19cc13..7154bcbea6 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -92,6 +92,18 @@ class _EventInternalMetadata(object):
         """
         return getattr(self, "soft_failed", False)
 
+    def should_proactively_send(self):
+        """Whether the event, if ours, should be sent to other clients and
+        servers.
+
+        This is used for sending dummy events internally. Servers and clients
+        can still explicitly fetch the event.
+
+        Returns:
+            bool
+        """
+        return getattr(self, "proactively_send", True)
+
 
 def _event_dict_property(key):
     # We want to be able to use hasattr with the event dict properties.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 4f0f939102..4224b29ecf 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -168,6 +168,9 @@ class FederationSender(object):
                     if not is_mine and send_on_behalf_of is None:
                         return
 
+                    if not event.internal_metadata.should_proactively_send():
+                        return
+
                     try:
                         # Get the state from before the event.
                         # We need to make sure that this is the state from before
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 09af6a41a0..d5a605d3bd 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2754,25 +2754,55 @@ class FederationHandler(BaseHandler):
         if not invite_event:
             raise AuthError(403, "Could not find invite")
 
+        logger.debug("Checking auth on event %r", event.content)
+
         last_exception = None
+        # for each public key in the 3pid invite event
         for public_key_object in self.hs.get_auth().get_public_keys(invite_event):
             try:
+                # for each sig on the third_party_invite block of the actual invite
                 for server, signature_block in signed["signatures"].items():
                     for key_name, encoded_signature in signature_block.items():
                         if not key_name.startswith("ed25519:"):
                             continue
 
-                        public_key = public_key_object["public_key"]
-                        verify_key = decode_verify_key_bytes(
-                            key_name,
-                            decode_base64(public_key)
+                        logger.debug(
+                            "Attempting to verify sig with key %s from %r "
+                            "against pubkey %r",
+                            key_name, server, public_key_object,
                         )
-                        verify_signed_json(signed, server, verify_key)
-                        if "key_validity_url" in public_key_object:
-                            yield self._check_key_revocation(
-                                public_key,
+
+                        try:
+                            public_key = public_key_object["public_key"]
+                            verify_key = decode_verify_key_bytes(
+                                key_name,
+                                decode_base64(public_key)
+                            )
+                            verify_signed_json(signed, server, verify_key)
+                            logger.debug(
+                                "Successfully verified sig with key %s from %r "
+                                "against pubkey %r",
+                                key_name, server, public_key_object,
+                            )
+                        except Exception:
+                            logger.info(
+                                "Failed to verify sig with key %s from %r "
+                                "against pubkey %r",
+                                key_name, server, public_key_object,
+                            )
+                            raise
+                        try:
+                            if "key_validity_url" in public_key_object:
+                                yield self._check_key_revocation(
+                                    public_key,
+                                    public_key_object["key_validity_url"]
+                                )
+                        except Exception:
+                            logger.info(
+                                "Failed to query key_validity_url %s",
                                 public_key_object["key_validity_url"]
                             )
+                            raise
                         return
             except Exception as e:
                 last_exception = e
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 11650dc80c..7728ea230d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -34,9 +34,10 @@ from synapse.api.errors import (
 from synapse.api.room_versions import RoomVersions
 from synapse.api.urls import ConsentURIBuilder
 from synapse.events.validator import EventValidator
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.send_event import ReplicationSendEventRestServlet
 from synapse.storage.state import StateFilter
-from synapse.types import RoomAlias, UserID
+from synapse.types import RoomAlias, UserID, create_requester
 from synapse.util.async_helpers import Linearizer
 from synapse.util.frozenutils import frozendict_json_encoder
 from synapse.util.logcontext import run_in_background
@@ -261,6 +262,18 @@ class EventCreationHandler(object):
         if self._block_events_without_consent_error:
             self._consent_uri_builder = ConsentURIBuilder(self.config)
 
+        if (
+            not self.config.worker_app
+            and self.config.cleanup_extremities_with_dummy_events
+        ):
+            self.clock.looping_call(
+                lambda: run_as_background_process(
+                    "send_dummy_events_to_fill_extremities",
+                    self._send_dummy_events_to_fill_extremities
+                ),
+                5 * 60 * 1000,
+            )
+
     @defer.inlineCallbacks
     def create_event(self, requester, event_dict, token_id=None, txn_id=None,
                      prev_events_and_hashes=None, require_consent=True):
@@ -874,3 +887,63 @@ class EventCreationHandler(object):
             yield presence.bump_presence_active_time(user)
         except Exception:
             logger.exception("Error bumping presence active time")
+
+    @defer.inlineCallbacks
+    def _send_dummy_events_to_fill_extremities(self):
+        """Background task to send dummy events into rooms that have a large
+        number of extremities
+        """
+
+        room_ids = yield self.store.get_rooms_with_many_extremities(
+            min_count=10, limit=5,
+        )
+
+        for room_id in room_ids:
+            # For each room we need to find a joined member we can use to send
+            # the dummy event with.
+
+            prev_events_and_hashes = yield self.store.get_prev_events_for_room(
+                room_id,
+            )
+
+            latest_event_ids = (
+                event_id for (event_id, _, _) in prev_events_and_hashes
+            )
+
+            members = yield self.state.get_current_users_in_room(
+                room_id, latest_event_ids=latest_event_ids,
+            )
+
+            user_id = None
+            for member in members:
+                if self.hs.is_mine_id(member):
+                    user_id = member
+                    break
+
+            if not user_id:
+                # We don't have a joined user.
+                # TODO: We should do something here to stop the room from
+                # appearing next time.
+                continue
+
+            requester = create_requester(user_id)
+
+            event, context = yield self.create_event(
+                requester,
+                {
+                    "type": "org.matrix.dummy_event",
+                    "content": {},
+                    "room_id": room_id,
+                    "sender": user_id,
+                },
+                prev_events_and_hashes=prev_events_and_hashes,
+            )
+
+            event.internal_metadata.proactively_send = False
+
+            yield self.send_nonmember_event(
+                requester,
+                event,
+                context,
+                ratelimit=False,
+            )
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index 09e39c2c28..e8d16edbc8 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -190,6 +190,35 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             room_id,
         )
 
+    def get_rooms_with_many_extremities(self, min_count, limit):
+        """Get the top rooms with at least N extremities.
+
+        Args:
+            min_count (int): The minimum number of extremities
+            limit (int): The maximum number of rooms to return.
+
+        Returns:
+            Deferred[list]: At most `limit` room IDs that have at least
+            `min_count` extremities, sorted by extremity count.
+        """
+
+        def _get_rooms_with_many_extremities_txn(txn):
+            sql = """
+                SELECT room_id FROM event_forward_extremities
+                GROUP BY room_id
+                HAVING count(*) > ?
+                ORDER BY count(*) DESC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (min_count, limit))
+            return [room_id for room_id, in txn]
+
+        return self.runInteraction(
+            "get_rooms_with_many_extremities",
+            _get_rooms_with_many_extremities_txn,
+        )
+
     @cached(max_entries=5000, iterable=True)
     def get_latest_event_ids_in_room(self, room_id):
         return self._simple_select_onecol(
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index d36917e4d6..0b3c656e90 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -662,7 +662,7 @@ class RegistrationStore(
 
             for user in rows:
                 if not user["count_tokens"] and not user["count_threepids"]:
-                    self.set_user_deactivated_status_txn(txn, user["user_id"], True)
+                    self.set_user_deactivated_status_txn(txn, user["name"], True)
                     rows_processed_nb += 1
 
             logger.info("Marked %d rows as deactivated", rows_processed_nb)
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index f4c81ef77d..e9e2d5337c 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -222,3 +222,44 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
         self.assertEqual(set(latest_event_ids), set([event_id_b, event_id_c]))
+
+
+class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
+    def make_homeserver(self, reactor, clock):
+        config = self.default_config()
+        config["cleanup_extremities_with_dummy_events"] = True
+        return self.setup_test_homeserver(config=config)
+
+    def prepare(self, reactor, clock, homeserver):
+        self.store = homeserver.get_datastore()
+        self.room_creator = homeserver.get_room_creation_handler()
+
+        # Create a test user and room
+        self.user = UserID("alice", "test")
+        self.requester = Requester(self.user, None, False, None, None)
+        info = self.get_success(self.room_creator.create_room(self.requester, {}))
+        self.room_id = info["room_id"]
+
+    def test_send_dummy_event(self):
+        # Create a bushy graph with 50 extremities.
+
+        event_id_start = self.create_and_send_event(self.room_id, self.user)
+
+        for _ in range(50):
+            self.create_and_send_event(
+                self.room_id, self.user, prev_event_ids=[event_id_start]
+            )
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(len(latest_event_ids), 50)
+
+        # Pump the reactor repeatedly so that the background updates have a
+        # chance to run.
+        self.pump(10 * 60)
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids))