summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/6615.misc1
-rw-r--r--changelog.d/6941.removal1
-rw-r--r--changelog.d/6952.misc1
-rw-r--r--changelog.d/6953.misc1
-rw-r--r--changelog.d/6954.misc1
-rw-r--r--changelog.d/6956.misc1
-rw-r--r--synapse/app/_base.py9
-rw-r--r--synapse/handlers/directory.py19
-rw-r--r--synapse/handlers/room.py36
-rw-r--r--synapse/state/__init__.py15
-rw-r--r--synapse/state/v2.py2
-rw-r--r--synapse/storage/data_stores/main/end_to_end_keys.py33
-rw-r--r--synapse/storage/data_stores/main/event_federation.py38
-rw-r--r--synapse/storage/data_stores/main/schema/full_schemas/README.md24
-rw-r--r--synapse/storage/database.py2
-rw-r--r--tests/state/test_v2.py6
16 files changed, 107 insertions, 83 deletions
diff --git a/changelog.d/6615.misc b/changelog.d/6615.misc
new file mode 100644
index 0000000000..9f93152565
--- /dev/null
+++ b/changelog.d/6615.misc
@@ -0,0 +1 @@
+Add some clarifications to `README.md` in the database schema directory.
diff --git a/changelog.d/6941.removal b/changelog.d/6941.removal
new file mode 100644
index 0000000000..8573be84b3
--- /dev/null
+++ b/changelog.d/6941.removal
@@ -0,0 +1 @@
+Stop sending m.room.aliases events during room creation and upgrade.
diff --git a/changelog.d/6952.misc b/changelog.d/6952.misc
new file mode 100644
index 0000000000..e26dc5cab8
--- /dev/null
+++ b/changelog.d/6952.misc
@@ -0,0 +1 @@
+Improve perf of v2 state res for large rooms.
diff --git a/changelog.d/6953.misc b/changelog.d/6953.misc
new file mode 100644
index 0000000000..0ab52041cf
--- /dev/null
+++ b/changelog.d/6953.misc
@@ -0,0 +1 @@
+Reduce time spent doing GC by freezing objects on startup.
diff --git a/changelog.d/6954.misc b/changelog.d/6954.misc
new file mode 100644
index 0000000000..8b84ce2f19
--- /dev/null
+++ b/changelog.d/6954.misc
@@ -0,0 +1 @@
+Minor perf fixes to `get_auth_chain_ids`.
diff --git a/changelog.d/6956.misc b/changelog.d/6956.misc
new file mode 100644
index 0000000000..5cb0894182
--- /dev/null
+++ b/changelog.d/6956.misc
@@ -0,0 +1 @@
+Don't record remote cross-signing keys in the `devices` table.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 0e8b467a3e..109b1e2fb5 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -279,6 +279,15 @@ def start(hs, listeners=None):
 
         setup_sentry(hs)
         setup_sdnotify(hs)
+
+        # We now freeze all allocated objects in the hopes that (almost)
+        # everything currently allocated are things that will be used for the
+        # rest of time. Doing so means less work each GC (hopefully).
+        #
+        # This only works on Python 3.7
+        if sys.version_info >= (3, 7):
+            gc.collect()
+            gc.freeze()
     except Exception:
         traceback.print_exc(file=sys.stderr)
         reactor = hs.get_reactor()
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index db2104c5f6..921d887b24 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 
+import collections
 import logging
 import string
 from typing import List
@@ -283,22 +284,6 @@ class DirectoryHandler(BaseHandler):
             )
 
     @defer.inlineCallbacks
-    def send_room_alias_update_event(self, requester, room_id):
-        aliases = yield self.store.get_aliases_for_room(room_id)
-
-        yield self.event_creation_handler.create_and_send_nonmember_event(
-            requester,
-            {
-                "type": EventTypes.Aliases,
-                "state_key": self.hs.hostname,
-                "room_id": room_id,
-                "sender": requester.user.to_string(),
-                "content": {"aliases": aliases},
-            },
-            ratelimit=False,
-        )
-
-    @defer.inlineCallbacks
     def _update_canonical_alias(self, requester, user_id, room_id, room_alias):
         """
         Send an updated canonical alias event if the removed alias was set as
@@ -326,7 +311,7 @@ class DirectoryHandler(BaseHandler):
         alt_aliases = content.pop("alt_aliases", None)
         # If the aliases are not a list (or not found) do not attempt to modify
         # the list.
-        if isinstance(alt_aliases, list):
+        if isinstance(alt_aliases, collections.Sequence):
             send_update = True
             alt_aliases = [alias for alias in alt_aliases if alias != alias_str]
             if alt_aliases:
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 49ec2f48bc..76e8f61b74 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -149,7 +149,9 @@ class RoomCreationHandler(BaseHandler):
         return ret
 
     @defer.inlineCallbacks
-    def _upgrade_room(self, requester, old_room_id, new_version):
+    def _upgrade_room(
+        self, requester: Requester, old_room_id: str, new_version: RoomVersion
+    ):
         user_id = requester.user.to_string()
 
         # start by allocating a new room id
@@ -448,19 +450,21 @@ class RoomCreationHandler(BaseHandler):
 
     @defer.inlineCallbacks
     def _move_aliases_to_new_room(
-        self, requester, old_room_id, new_room_id, old_room_state
+        self,
+        requester: Requester,
+        old_room_id: str,
+        new_room_id: str,
+        old_room_state: StateMap[str],
     ):
         directory_handler = self.hs.get_handlers().directory_handler
 
         aliases = yield self.store.get_aliases_for_room(old_room_id)
 
         # check to see if we have a canonical alias.
-        canonical_alias = None
+        canonical_alias_event = None
         canonical_alias_event_id = old_room_state.get((EventTypes.CanonicalAlias, ""))
         if canonical_alias_event_id:
             canonical_alias_event = yield self.store.get_event(canonical_alias_event_id)
-            if canonical_alias_event:
-                canonical_alias = canonical_alias_event.content.get("alias", "")
 
         # first we try to remove the aliases from the old room (we suppress sending
         # the room_aliases event until the end).
@@ -488,19 +492,6 @@ class RoomCreationHandler(BaseHandler):
         if not removed_aliases:
             return
 
-        try:
-            # this can fail if, for some reason, our user doesn't have perms to send
-            # m.room.aliases events in the old room (note that we've already checked that
-            # they have perms to send a tombstone event, so that's not terribly likely).
-            #
-            # If that happens, it's regrettable, but we should carry on: it's the same
-            # as when you remove an alias from the directory normally - it just means that
-            # the aliases event gets out of sync with the directory
-            # (cf https://github.com/vector-im/riot-web/issues/2369)
-            yield directory_handler.send_room_alias_update_event(requester, old_room_id)
-        except AuthError as e:
-            logger.warning("Failed to send updated alias event on old room: %s", e)
-
         # we can now add any aliases we successfully removed to the new room.
         for alias in removed_aliases:
             try:
@@ -517,8 +508,10 @@ class RoomCreationHandler(BaseHandler):
                 # checking module decides it shouldn't, or similar.
                 logger.error("Error adding alias %s to new room: %s", alias, e)
 
+        # If a canonical alias event existed for the old room, fire a canonical
+        # alias event for the new room with a copy of the information.
         try:
-            if canonical_alias and (canonical_alias in removed_aliases):
+            if canonical_alias_event:
                 yield self.event_creation_handler.create_and_send_nonmember_event(
                     requester,
                     {
@@ -526,12 +519,10 @@ class RoomCreationHandler(BaseHandler):
                         "state_key": "",
                         "room_id": new_room_id,
                         "sender": requester.user.to_string(),
-                        "content": {"alias": canonical_alias},
+                        "content": canonical_alias_event.content,
                     },
                     ratelimit=False,
                 )
-
-            yield directory_handler.send_room_alias_update_event(requester, new_room_id)
         except SynapseError as e:
             # again I'm not really expecting this to fail, but if it does, I'd rather
             # we returned the new room to the client at this point.
@@ -757,7 +748,6 @@ class RoomCreationHandler(BaseHandler):
 
         if room_alias:
             result["room_alias"] = room_alias.to_string()
-            yield directory_handler.send_room_alias_update_event(requester, room_id)
 
         return result
 
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index fdd6bef6b4..df7a4f6a89 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -16,7 +16,7 @@
 
 import logging
 from collections import namedtuple
-from typing import Dict, Iterable, List, Optional
+from typing import Dict, Iterable, List, Optional, Set
 
 from six import iteritems, itervalues
 
@@ -662,7 +662,7 @@ class StateResolutionStore(object):
             allow_rejected=allow_rejected,
         )
 
-    def get_auth_chain(self, event_ids):
+    def get_auth_chain(self, event_ids: List[str], ignore_events: Set[str]):
         """Gets the full auth chain for a set of events (including rejected
         events).
 
@@ -674,11 +674,16 @@ class StateResolutionStore(object):
                presence of rejected events
 
         Args:
-            event_ids (list): The event IDs of the events to fetch the auth
-                chain for. Must be state events.
+            event_ids: The event IDs of the events to fetch the auth chain for.
+                Must be state events.
+            ignore_events: Set of events to exclude from the returned auth
+                chain.
+
 
         Returns:
             Deferred[list[str]]: List of event IDs of the auth chain.
         """
 
-        return self.store.get_auth_chain_ids(event_ids, include_given=True)
+        return self.store.get_auth_chain_ids(
+            event_ids, include_given=True, ignore_events=ignore_events,
+        )
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 531018c6a5..75fe58305a 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -248,7 +248,7 @@ def _get_auth_chain_difference(state_sets, event_map, state_res_store):
             and eid not in common
         )
 
-        auth_chain = yield state_res_store.get_auth_chain(auth_ids)
+        auth_chain = yield state_res_store.get_auth_chain(auth_ids, common)
         auth_ids.update(auth_chain)
 
         auth_sets.append(auth_ids)
diff --git a/synapse/storage/data_stores/main/end_to_end_keys.py b/synapse/storage/data_stores/main/end_to_end_keys.py
index e551606f9d..001a53f9b4 100644
--- a/synapse/storage/data_stores/main/end_to_end_keys.py
+++ b/synapse/storage/data_stores/main/end_to_end_keys.py
@@ -680,11 +680,6 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
                 'user_signing' for a user-signing key
             key (dict): the key data
         """
-        # the cross-signing keys need to occupy the same namespace as devices,
-        # since signatures are identified by device ID.  So add an entry to the
-        # device table to make sure that we don't have a collision with device
-        # IDs
-
         # the 'key' dict will look something like:
         # {
         #   "user_id": "@alice:example.com",
@@ -701,16 +696,24 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
         # The "keys" property must only have one entry, which will be the public
         # key, so we just grab the first value in there
         pubkey = next(iter(key["keys"].values()))
-        self.db.simple_insert_txn(
-            txn,
-            "devices",
-            values={
-                "user_id": user_id,
-                "device_id": pubkey,
-                "display_name": key_type + " signing key",
-                "hidden": True,
-            },
-        )
+
+        # The cross-signing keys need to occupy the same namespace as devices,
+        # since signatures are identified by device ID.  So add an entry to the
+        # device table to make sure that we don't have a collision with device
+        # IDs.
+        # We only need to do this for local users, since remote servers should be
+        # responsible for checking this for their own users.
+        if self.hs.is_mine_id(user_id):
+            self.db.simple_insert_txn(
+                txn,
+                "devices",
+                values={
+                    "user_id": user_id,
+                    "device_id": pubkey,
+                    "display_name": key_type + " signing key",
+                    "hidden": True,
+                },
+            )
 
         # and finally, store the key itself
         with self._cross_signing_id_gen.get_next() as stream_id:
diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py
index 60c67457b4..750ec1b70d 100644
--- a/synapse/storage/data_stores/main/event_federation.py
+++ b/synapse/storage/data_stores/main/event_federation.py
@@ -14,8 +14,8 @@
 # limitations under the License.
 import itertools
 import logging
+from typing import List, Optional, Set
 
-from six.moves import range
 from six.moves.queue import Empty, PriorityQueue
 
 from twisted.internet import defer
@@ -27,6 +27,7 @@ from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
 from synapse.storage.data_stores.main.signatures import SignatureWorkerStore
 from synapse.storage.database import Database
 from synapse.util.caches.descriptors import cached
+from synapse.util.iterutils import batch_iter
 
 logger = logging.getLogger(__name__)
 
@@ -46,21 +47,37 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             event_ids, include_given=include_given
         ).addCallback(self.get_events_as_list)
 
-    def get_auth_chain_ids(self, event_ids, include_given=False):
+    def get_auth_chain_ids(
+        self,
+        event_ids: List[str],
+        include_given: bool = False,
+        ignore_events: Optional[Set[str]] = None,
+    ):
         """Get auth events for given event_ids. The events *must* be state events.
 
         Args:
-            event_ids (list): state events
-            include_given (bool): include the given events in result
+            event_ids: state events
+            include_given: include the given events in result
+            ignore_events: Set of events to exclude from the returned auth
+                chain. This is useful if the caller will just discard the
+                given events anyway, and saves us from figuring out their auth
+                chains if not required.
 
         Returns:
             list of event_ids
         """
         return self.db.runInteraction(
-            "get_auth_chain_ids", self._get_auth_chain_ids_txn, event_ids, include_given
+            "get_auth_chain_ids",
+            self._get_auth_chain_ids_txn,
+            event_ids,
+            include_given,
+            ignore_events,
         )
 
-    def _get_auth_chain_ids_txn(self, txn, event_ids, include_given):
+    def _get_auth_chain_ids_txn(self, txn, event_ids, include_given, ignore_events):
+        if ignore_events is None:
+            ignore_events = set()
+
         if include_given:
             results = set(event_ids)
         else:
@@ -71,15 +88,14 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         front = set(event_ids)
         while front:
             new_front = set()
-            front_list = list(front)
-            chunks = [front_list[x : x + 100] for x in range(0, len(front), 100)]
-            for chunk in chunks:
+            for chunk in batch_iter(front, 100):
                 clause, args = make_in_list_sql_clause(
                     txn.database_engine, "event_id", chunk
                 )
-                txn.execute(base_sql + clause, list(args))
-                new_front.update([r[0] for r in txn])
+                txn.execute(base_sql + clause, args)
+                new_front.update(r[0] for r in txn)
 
+            new_front -= ignore_events
             new_front -= results
 
             front = new_front
diff --git a/synapse/storage/data_stores/main/schema/full_schemas/README.md b/synapse/storage/data_stores/main/schema/full_schemas/README.md
index bbd3f18604..c00f287190 100644
--- a/synapse/storage/data_stores/main/schema/full_schemas/README.md
+++ b/synapse/storage/data_stores/main/schema/full_schemas/README.md
@@ -1,13 +1,21 @@
-# Building full schema dumps
+# Synapse Database Schemas
 
-These schemas need to be made from a database that has had all background updates run.
+These schemas are used as a basis to create brand new Synapse databases, on both
+SQLite3 and Postgres.
 
-To do so, use `scripts-dev/make_full_schema.sh`. This will produce
-`full.sql.postgres ` and `full.sql.sqlite` files.
+## Building full schema dumps
+
+If you want to recreate these schemas, they need to be made from a database that
+has had all background updates run.
+
+To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
+`full.sql.postgres ` and `full.sql.sqlite` files. 
 
 Ensure postgres is installed and your user has the ability to run bash commands
-such as `createdb`.
+such as `createdb`, then call
+
+    ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
 
-```
-./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
-```
+There are currently two folders with full-schema snapshots. `16` is a snapshot
+from 2015, for historical reference. The other contains the most recent full
+schema snapshot.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 3eeb2f7c04..6dcb5c04da 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1504,7 +1504,7 @@ class Database(object):
 
 def make_in_list_sql_clause(
     database_engine, column: str, iterable: Iterable
-) -> Tuple[str, Iterable]:
+) -> Tuple[str, list]:
     """Returns an SQL clause that checks the given column is in the iterable.
 
     On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres
diff --git a/tests/state/test_v2.py b/tests/state/test_v2.py
index 5bafad9f19..5059ade850 100644
--- a/tests/state/test_v2.py
+++ b/tests/state/test_v2.py
@@ -603,7 +603,7 @@ class TestStateResolutionStore(object):
 
         return {eid: self.event_map[eid] for eid in event_ids if eid in self.event_map}
 
-    def get_auth_chain(self, event_ids):
+    def get_auth_chain(self, event_ids, ignore_events):
         """Gets the full auth chain for a set of events (including rejected
         events).
 
@@ -617,6 +617,8 @@ class TestStateResolutionStore(object):
         Args:
             event_ids (list): The event IDs of the events to fetch the auth
                 chain for. Must be state events.
+            ignore_events: Set of events to exclude from the returned auth
+                chain.
 
         Returns:
             Deferred[list[str]]: List of event IDs of the auth chain.
@@ -627,7 +629,7 @@ class TestStateResolutionStore(object):
         stack = list(event_ids)
         while stack:
             event_id = stack.pop()
-            if event_id in result:
+            if event_id in result or event_id in ignore_events:
                 continue
 
             result.add(event_id)