From 20c76cecb9eb84dadfa7b2d25b436d3ab9218a1a Mon Sep 17 00:00:00 2001 From: Shay Date: Tue, 30 Aug 2022 10:29:16 -0700 Subject: Drop unused column `application_services_state.last_txn` (#13627) --- synapse/storage/schema/__init__.py | 1 + ...pplication_services_state_last_txn.sql.postgres | 17 +++++++++ ..._application_services_state_last_txn.sql.sqlite | 40 ++++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres create mode 100644 synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index a9a88c8bfd..256f745dc0 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -75,6 +75,7 @@ Changes in SCHEMA_VERSION = 71: Changes in SCHEMA_VERSION = 72: - event_edges.(room_id, is_state) are no longer written to. - Tables related to groups are dropped. + - Unused column application_services_state.last_txn is dropped """ diff --git a/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres new file mode 100644 index 0000000000..13d47de9e6 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.postgres @@ -0,0 +1,17 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Drop unused column application_services_state.last_txn +ALTER table application_services_state DROP COLUMN last_txn; \ No newline at end of file diff --git a/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite new file mode 100644 index 0000000000..3be1c88d72 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/04drop_column_application_services_state_last_txn.sql.sqlite @@ -0,0 +1,40 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Drop unused column application_services_state.last_txn + +CREATE TABLE application_services_state2 ( + as_id TEXT PRIMARY KEY NOT NULL, + state VARCHAR(5), + read_receipt_stream_id BIGINT, + presence_stream_id BIGINT, + to_device_stream_id BIGINT, + device_list_stream_id BIGINT +); + + +INSERT INTO application_services_state2 ( + as_id, + state, + read_receipt_stream_id, + presence_stream_id, + to_device_stream_id, + device_list_stream_id +) +SELECT as_id, state, read_receipt_stream_id, presence_stream_id, to_device_stream_id, device_list_stream_id +FROM application_services_state; + +DROP TABLE application_services_state; +ALTER TABLE application_services_state2 RENAME TO application_services_state; -- cgit 1.5.1 From 48a5c47a9f7379f3b7e85c5a8dcbae346eb22310 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 1 Sep 2022 14:57:47 -0400 Subject: Add a schema delta to drop unstable private read receipts. (#13692) Otherwise they'll be leaked due to the filtering code only respecting the stable identifiers for private read receipts. --- changelog.d/13692.removal | 1 + .../72/05remove_unstable_private_read_receipts.sql | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 changelog.d/13692.removal create mode 100644 synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13692.removal b/changelog.d/13692.removal new file mode 100644 index 0000000000..eb075d4517 --- /dev/null +++ b/changelog.d/13692.removal @@ -0,0 +1 @@ +Remove support for unstable [private read receipts](https://github.com/matrix-org/matrix-spec-proposals/pull/2285). diff --git a/synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql b/synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql new file mode 100644 index 0000000000..36b41049cd --- /dev/null +++ b/synapse/storage/schema/main/delta/72/05remove_unstable_private_read_receipts.sql @@ -0,0 +1,19 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Drop previously received private read receipts so they do not accidentally +-- get leaked to other users. +DELETE FROM receipts_linearized WHERE receipt_type = 'org.matrix.msc2285.read.private'; +DELETE FROM receipts_graph WHERE receipt_type = 'org.matrix.msc2285.read.private'; -- cgit 1.5.1 From f799eac7ea96f943ad1272a5a81f845dfa08a254 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 8 Sep 2022 17:41:48 +0200 Subject: Add timestamp to user's consent (#13741) Co-authored-by: reivilibre --- changelog.d/13741.feature | 1 + docs/admin_api/user_admin_api.md | 2 ++ synapse/handlers/admin.py | 1 + synapse/storage/databases/main/registration.py | 6 +++- .../main/delta/72/06add_consent_ts_to_users.sql | 16 +++++++++++ tests/rest/admin/test_user.py | 1 + tests/storage/test_registration.py | 33 +++++++++++++++++----- 7 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 changelog.d/13741.feature create mode 100644 synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13741.feature b/changelog.d/13741.feature new file mode 100644 index 0000000000..dff46f373f --- /dev/null +++ b/changelog.d/13741.feature @@ -0,0 +1 @@ +Document the timestamp when a user accepts the consent, if [consent tracking](https://matrix-org.github.io/synapse/latest/consent_tracking.html) is used. \ No newline at end of file diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index c1ca0c8a64..975f05c929 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -42,6 +42,7 @@ It returns a JSON body like the following: "appservice_id": null, "consent_server_notice_sent": null, "consent_version": null, + "consent_ts": null, "external_ids": [ { "auth_provider": "", @@ -364,6 +365,7 @@ The following actions are **NOT** performed. The list may be incomplete. - Remove the user's creation (registration) timestamp - [Remove rate limit overrides](#override-ratelimiting-for-users) - Remove from monthly active users +- Remove user's consent information (consent version and timestamp) ## Reset password diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index d4fe7df533..cf9f19608a 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -70,6 +70,7 @@ class AdminHandler: "appservice_id", "consent_server_notice_sent", "consent_version", + "consent_ts", "user_type", "is_guest", } diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 7fb9c801da..ac821878b0 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -175,6 +175,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): "is_guest", "admin", "consent_version", + "consent_ts", "consent_server_notice_sent", "appservice_id", "creation_ts", @@ -2227,7 +2228,10 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): txn, table="users", keyvalues={"name": user_id}, - updatevalues={"consent_version": consent_version}, + updatevalues={ + "consent_version": consent_version, + "consent_ts": self._clock.time_msec(), + }, ) self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) diff --git a/synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql b/synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql new file mode 100644 index 0000000000..609eb1750f --- /dev/null +++ b/synapse/storage/schema/main/delta/72/06add_consent_ts_to_users.sql @@ -0,0 +1,16 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE users ADD consent_ts bigint; diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 1afd082707..ec5ccf6fca 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -2580,6 +2580,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.assertIn("appservice_id", content) self.assertIn("consent_server_notice_sent", content) self.assertIn("consent_version", content) + self.assertIn("consent_ts", content) self.assertIn("external_ids", content) # This key was removed intentionally. Ensure it is not accidentally re-included. diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index a49ac1525e..853a93afab 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -11,15 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import UserTypes from synapse.api.errors import ThreepidValidationError +from synapse.server import HomeServer +from synapse.util import Clock from tests.unittest import HomeserverTestCase class RegistrationStoreTestCase(HomeserverTestCase): - def prepare(self, reactor, clock, hs): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main self.user_id = "@my-user:test" @@ -27,7 +30,7 @@ class RegistrationStoreTestCase(HomeserverTestCase): self.pwhash = "{xx1}123456789" self.device_id = "akgjhdjklgshg" - def test_register(self): + def test_register(self) -> None: self.get_success(self.store.register_user(self.user_id, self.pwhash)) self.assertEqual( @@ -38,6 +41,7 @@ class RegistrationStoreTestCase(HomeserverTestCase): "admin": 0, "is_guest": 0, "consent_version": None, + "consent_ts": None, "consent_server_notice_sent": None, "appservice_id": None, "creation_ts": 0, @@ -48,7 +52,20 @@ class RegistrationStoreTestCase(HomeserverTestCase): (self.get_success(self.store.get_user_by_id(self.user_id))), ) - def test_add_tokens(self): + def test_consent(self) -> None: + self.get_success(self.store.register_user(self.user_id, self.pwhash)) + before_consent = self.clock.time_msec() + self.reactor.advance(5) + self.get_success(self.store.user_set_consent_version(self.user_id, "1")) + self.reactor.advance(5) + + user = self.get_success(self.store.get_user_by_id(self.user_id)) + assert user + self.assertEqual(user["consent_version"], "1") + self.assertGreater(user["consent_ts"], before_consent) + self.assertLess(user["consent_ts"], self.clock.time_msec()) + + def test_add_tokens(self) -> None: self.get_success(self.store.register_user(self.user_id, self.pwhash)) self.get_success( self.store.add_access_token_to_user( @@ -58,11 +75,12 @@ class RegistrationStoreTestCase(HomeserverTestCase): result = self.get_success(self.store.get_user_by_access_token(self.tokens[1])) + assert result self.assertEqual(result.user_id, self.user_id) self.assertEqual(result.device_id, self.device_id) self.assertIsNotNone(result.token_id) - def test_user_delete_access_tokens(self): + def test_user_delete_access_tokens(self) -> None: # add some tokens self.get_success(self.store.register_user(self.user_id, self.pwhash)) self.get_success( @@ -87,6 +105,7 @@ class RegistrationStoreTestCase(HomeserverTestCase): # check the one not associated with the device was not deleted user = self.get_success(self.store.get_user_by_access_token(self.tokens[0])) + assert user self.assertEqual(self.user_id, user.user_id) # now delete the rest @@ -95,11 +114,11 @@ class RegistrationStoreTestCase(HomeserverTestCase): user = self.get_success(self.store.get_user_by_access_token(self.tokens[0])) self.assertIsNone(user, "access token was not deleted without device_id") - def test_is_support_user(self): + def test_is_support_user(self) -> None: TEST_USER = "@test:test" SUPPORT_USER = "@support:test" - res = self.get_success(self.store.is_support_user(None)) + res = self.get_success(self.store.is_support_user(None)) # type: ignore[arg-type] self.assertFalse(res) self.get_success( self.store.register_user(user_id=TEST_USER, password_hash=None) @@ -115,7 +134,7 @@ class RegistrationStoreTestCase(HomeserverTestCase): res = self.get_success(self.store.is_support_user(SUPPORT_USER)) self.assertTrue(res) - def test_3pid_inhibit_invalid_validation_session_error(self): + def test_3pid_inhibit_invalid_validation_session_error(self) -> None: """Tests that enabling the configuration option to inhibit 3PID errors on /requestToken also inhibits validation errors caused by an unknown session ID. """ -- cgit 1.5.1 From da41a7cd618d11b05c2c04c39068fd4b1e1b7894 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Mon, 12 Sep 2022 12:58:33 +0100 Subject: Remove check current state membership up to date (#13745) * Remove checks for membership column in current_state_events * Add schema script to force through the `current_state_events_membership` background job Contributed by Nick @ Beeper (@fizzadar). --- changelog.d/13745.misc | 1 + synapse/storage/databases/main/roommember.py | 202 +++++---------------- ...force_update_current_state_events_membership.py | 52 ++++++ 3 files changed, 100 insertions(+), 155 deletions(-) create mode 100644 changelog.d/13745.misc create mode 100644 synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py (limited to 'synapse/storage/schema') diff --git a/changelog.d/13745.misc b/changelog.d/13745.misc new file mode 100644 index 0000000000..e97a789c0e --- /dev/null +++ b/changelog.d/13745.misc @@ -0,0 +1 @@ +Remove old queries to join room memberships to current state events. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 6e1ff5626b..fdb4684e12 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -32,10 +32,7 @@ import attr from synapse.api.constants import EventTypes, Membership from synapse.metrics import LaterGauge -from synapse.metrics.background_process_metrics import ( - run_as_background_process, - wrap_as_background_process, -) +from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -91,16 +88,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): # at a time. Keyed by room_id. self._joined_host_linearizer = Linearizer("_JoinedHostsCache") - # Is the current_state_events.membership up to date? Or is the - # background update still running? - self._current_state_events_membership_up_to_date = False - - txn = db_conn.cursor( - txn_name="_check_safe_current_state_events_membership_updated" - ) - self._check_safe_current_state_events_membership_updated_txn(txn) - txn.close() - if ( self.hs.config.worker.run_background_tasks and self.hs.config.metrics.metrics_flags.known_servers @@ -157,34 +144,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): self._known_servers_count = max([count, 1]) return self._known_servers_count - def _check_safe_current_state_events_membership_updated_txn( - self, txn: LoggingTransaction - ) -> None: - """Checks if it is safe to assume the new current_state_events - membership column is up to date - """ - - pending_update = self.db_pool.simple_select_one_txn( - txn, - table="background_updates", - keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME}, - retcols=["update_name"], - allow_none=True, - ) - - self._current_state_events_membership_up_to_date = not pending_update - - # If the update is still running, reschedule to run. - if pending_update: - self._clock.call_later( - 15.0, - run_as_background_process, - "_check_safe_current_state_events_membership_updated", - self.db_pool.runInteraction, - "_check_safe_current_state_events_membership_updated", - self._check_safe_current_state_events_membership_updated_txn, - ) - @cached(max_entries=100000, iterable=True) async def get_users_in_room(self, room_id: str) -> List[str]: """ @@ -212,31 +171,14 @@ class RoomMemberWorkerStore(EventsWorkerStore): `get_current_hosts_in_room()` and so we can re-use the cache but it's not horrible to have here either. """ - # If we can assume current_state_events.membership is up to date - # then we can avoid a join, which is a Very Good Thing given how - # frequently this function gets called. - if self._current_state_events_membership_up_to_date: - sql = """ - SELECT c.state_key FROM current_state_events as c - /* Get the depth of the event from the events table */ - INNER JOIN events AS e USING (event_id) - WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? - /* Sorted by lowest depth first */ - ORDER BY e.depth ASC; - """ - else: - sql = """ - SELECT c.state_key FROM room_memberships as m - /* Get the depth of the event from the events table */ - INNER JOIN events AS e USING (event_id) - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? - /* Sorted by lowest depth first */ - ORDER BY e.depth ASC; - """ + sql = """ + SELECT c.state_key FROM current_state_events as c + /* Get the depth of the event from the events table */ + INNER JOIN events AS e USING (event_id) + WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? + /* Sorted by lowest depth first */ + ORDER BY e.depth ASC; + """ txn.execute(sql, (room_id, Membership.JOIN)) return [r[0] for r in txn] @@ -353,28 +295,14 @@ class RoomMemberWorkerStore(EventsWorkerStore): # We do this all in one transaction to keep the cache small. # FIXME: get rid of this when we have room_stats - # If we can assume current_state_events.membership is up to date - # then we can avoid a join, which is a Very Good Thing given how - # frequently this function gets called. - if self._current_state_events_membership_up_to_date: - # Note, rejected events will have a null membership field, so - # we we manually filter them out. - sql = """ - SELECT count(*), membership FROM current_state_events - WHERE type = 'm.room.member' AND room_id = ? - AND membership IS NOT NULL - GROUP BY membership - """ - else: - sql = """ - SELECT count(*), m.membership FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? - GROUP BY m.membership - """ + # Note, rejected events will have a null membership field, so + # we we manually filter them out. + sql = """ + SELECT count(*), membership FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? + AND membership IS NOT NULL + GROUP BY membership + """ txn.execute(sql, (room_id,)) res: Dict[str, MemberSummary] = {} @@ -383,30 +311,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): # we order by membership and then fairly arbitrarily by event_id so # heroes are consistent - if self._current_state_events_membership_up_to_date: - # Note, rejected events will have a null membership field, so - # we we manually filter them out. - sql = """ - SELECT state_key, membership, event_id - FROM current_state_events - WHERE type = 'm.room.member' AND room_id = ? - AND membership IS NOT NULL - ORDER BY - CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, - event_id ASC - LIMIT ? - """ - else: - sql = """ - SELECT c.state_key, m.membership, c.event_id - FROM room_memberships as m - INNER JOIN current_state_events as c USING (room_id, event_id) - WHERE c.type = 'm.room.member' AND c.room_id = ? - ORDER BY - CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, - c.event_id ASC - LIMIT ? - """ + # Note, rejected events will have a null membership field, so + # we we manually filter them out. + sql = """ + SELECT state_key, membership, event_id + FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? + AND membership IS NOT NULL + ORDER BY + CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, + event_id ASC + LIMIT ? + """ # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user. txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6)) @@ -649,27 +565,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): # We use `current_state_events` here and not `local_current_membership` # as a) this gets called with remote users and b) this only gets called # for rooms the server is participating in. - if self._current_state_events_membership_up_to_date: - sql = """ - SELECT room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND c.state_key = ? - AND c.membership = ? - """ - else: - sql = """ - SELECT room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN room_memberships AS m USING (room_id, event_id) - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND c.state_key = ? - AND m.membership = ? - """ + sql = """ + SELECT room_id, e.instance_name, e.stream_ordering + FROM current_state_events AS c + INNER JOIN events AS e USING (room_id, event_id) + WHERE + c.type = 'm.room.member' + AND c.state_key = ? + AND c.membership = ? + """ txn.execute(sql, (user_id, Membership.JOIN)) return frozenset( @@ -707,27 +611,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): user_ids, ) - if self._current_state_events_membership_up_to_date: - sql = f""" - SELECT c.state_key, room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND c.membership = ? - AND {clause} - """ - else: - sql = f""" - SELECT c.state_key, room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN room_memberships AS m USING (room_id, event_id) - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND m.membership = ? - AND {clause} - """ + sql = f""" + SELECT c.state_key, room_id, e.instance_name, e.stream_ordering + FROM current_state_events AS c + INNER JOIN events AS e USING (room_id, event_id) + WHERE + c.type = 'm.room.member' + AND c.membership = ? + AND {clause} + """ txn.execute(sql, [Membership.JOIN] + args) diff --git a/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py new file mode 100644 index 0000000000..b5853d125c --- /dev/null +++ b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py @@ -0,0 +1,52 @@ +# Copyright 2022 Beeper +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Forces through the `current_state_events_membership` background job so checks +for its completion can be removed. + +Note the background job must still remain defined in the database class. +""" + + +def run_upgrade(cur, database_engine, *args, **kwargs): + cur.execute("SELECT update_name FROM background_updates") + rows = cur.fetchall() + for row in rows: + if row[0] == "current_state_events_membership": + break + # No pending background job so nothing to do here + else: + return + + # Populate membership field for all current_state_events, this may take + # a while but was originally handled via a background update in 2019. + cur.execute( + """ + UPDATE current_state_events + SET membership = ( + SELECT membership FROM room_memberships + WHERE event_id = current_state_events.event_id + ) + """ + ) + + # Finally, delete the background job because we've handled it above + cur.execute( + """ + DELETE FROM background_updates + WHERE update_name = 'current_state_events_membership' + """ + ) -- cgit 1.5.1 From cdbb6412327b542e0dead792717fe58253291131 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Tue, 13 Sep 2022 08:16:37 +0100 Subject: Add receipts event stream ordering (#13703) --- changelog.d/13703.misc | 1 + synapse/_scripts/synapse_port_db.py | 2 + synapse/storage/databases/main/receipts.py | 74 +++++++++++++++++++++- .../delta/72/05receipts_event_stream_ordering.sql | 19 ++++++ 4 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13703.misc create mode 100644 synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13703.misc b/changelog.d/13703.misc new file mode 100644 index 0000000000..685a29b17d --- /dev/null +++ b/changelog.d/13703.misc @@ -0,0 +1 @@ +Add & populate `event_stream_ordering` column on receipts table for future optimisation of push action processing. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 543bba27c2..30983c47fb 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -67,6 +67,7 @@ from synapse.storage.databases.main.media_repository import ( ) from synapse.storage.databases.main.presence import PresenceBackgroundUpdateStore from synapse.storage.databases.main.pusher import PusherWorkerStore +from synapse.storage.databases.main.receipts import ReceiptsBackgroundUpdateStore from synapse.storage.databases.main.registration import ( RegistrationBackgroundUpdateStore, find_max_generated_user_id_localpart, @@ -203,6 +204,7 @@ class Store( PushRuleStore, PusherWorkerStore, PresenceBackgroundUpdateStore, + ReceiptsBackgroundUpdateStore, ): def execute(self, f: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]: return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 3838409519..719a12b0ae 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -675,6 +675,7 @@ class ReceiptsWorkerStore(SQLBaseStore): values={ "stream_id": stream_id, "event_id": event_id, + "event_stream_ordering": stream_ordering, "data": json_encoder.encode(data), }, # receipts_linearized has a unique constraint on @@ -830,5 +831,76 @@ class ReceiptsWorkerStore(SQLBaseStore): ) -class ReceiptsStore(ReceiptsWorkerStore): +class ReceiptsBackgroundUpdateStore(SQLBaseStore): + POPULATE_RECEIPT_EVENT_STREAM_ORDERING = "populate_event_stream_ordering" + + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self.db_pool.updates.register_background_update_handler( + self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING, + self._populate_receipt_event_stream_ordering, + ) + + async def _populate_receipt_event_stream_ordering( + self, progress: JsonDict, batch_size: int + ) -> int: + def _populate_receipt_event_stream_ordering_txn( + txn: LoggingTransaction, + ) -> bool: + + if "max_stream_id" in progress: + max_stream_id = progress["max_stream_id"] + else: + txn.execute("SELECT max(stream_id) FROM receipts_linearized") + res = txn.fetchone() + if res is None or res[0] is None: + return True + else: + max_stream_id = res[0] + + start = progress.get("stream_id", 0) + stop = start + batch_size + + sql = """ + UPDATE receipts_linearized + SET event_stream_ordering = ( + SELECT stream_ordering + FROM events + WHERE event_id = receipts_linearized.event_id + ) + WHERE stream_id >= ? AND stream_id < ? + """ + txn.execute(sql, (start, stop)) + + self.db_pool.updates._background_update_progress_txn( + txn, + self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING, + { + "stream_id": stop, + "max_stream_id": max_stream_id, + }, + ) + + return stop > max_stream_id + + finished = await self.db_pool.runInteraction( + "_remove_devices_from_device_inbox_txn", + _populate_receipt_event_stream_ordering_txn, + ) + + if finished: + await self.db_pool.updates._end_background_update( + self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING + ) + + return batch_size + + +class ReceiptsStore(ReceiptsWorkerStore, ReceiptsBackgroundUpdateStore): pass diff --git a/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql b/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql new file mode 100644 index 0000000000..2a822f4509 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql @@ -0,0 +1,19 @@ +/* Copyright 2022 Beeper + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE receipts_linearized ADD COLUMN event_stream_ordering BIGINT; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_event_stream_ordering', '{}'); -- cgit 1.5.1 From b60d47ab2c55580fc1941497964cd33c27838231 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 13 Sep 2022 10:53:11 +0100 Subject: Updates to the schema dump script (#13770) --- changelog.d/13770.misc | 1 + scripts-dev/make_full_schema.sh | 48 ++++++++-------------- .../storage/schema/state/delta/30/state_stream.sql | 4 ++ 3 files changed, 21 insertions(+), 32 deletions(-) create mode 100644 changelog.d/13770.misc (limited to 'synapse/storage/schema') diff --git a/changelog.d/13770.misc b/changelog.d/13770.misc new file mode 100644 index 0000000000..36ac91400a --- /dev/null +++ b/changelog.d/13770.misc @@ -0,0 +1 @@ +Update the script which makes full schema dumps. diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh index f0e22d4ca2..61394360ce 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh @@ -9,8 +9,10 @@ export PGHOST="localhost" POSTGRES_DB_NAME="synapse_full_schema.$$" -SQLITE_FULL_SCHEMA_OUTPUT_FILE="full.sql.sqlite" -POSTGRES_FULL_SCHEMA_OUTPUT_FILE="full.sql.postgres" +SQLITE_SCHEMA_FILE="schema.sql.sqlite" +SQLITE_ROWS_FILE="rows.sql.sqlite" +POSTGRES_SCHEMA_FILE="full.sql.postgres" +POSTGRES_ROWS_FILE="rows.sql.postgres" REQUIRED_DEPS=("matrix-synapse" "psycopg2") @@ -22,7 +24,7 @@ usage() { echo " Username to connect to local postgres instance. The password will be requested" echo " during script execution." echo "-c" - echo " CI mode. Enables coverage tracking and prints every command that the script runs." + echo " CI mode. Prints every command that the script runs." echo "-o " echo " Directory to output full schema files to." echo "-h" @@ -37,11 +39,6 @@ while getopts "p:co:h" opt; do c) # Print all commands that are being executed set -x - - # Modify required dependencies for coverage - REQUIRED_DEPS+=("coverage" "coverage-enable-subprocess") - - COVERAGE=1 ;; o) command -v realpath > /dev/null || (echo "The -o flag requires the 'realpath' binary to be installed" && exit 1) @@ -102,6 +99,7 @@ SQLITE_DB=$TMPDIR/homeserver.db POSTGRES_CONFIG=$TMPDIR/postgres.conf # Ensure these files are delete on script exit +# TODO: the trap should also drop the temp postgres DB trap 'rm -rf $TMPDIR' EXIT cat > "$SQLITE_CONFIG" < "$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE" +echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..." +sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE" +sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE" -echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE'..." -pg_dump --format=plain --no-tablespaces --no-acl --no-owner $POSTGRES_DB_NAME | sed -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE" +echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..." +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE" echo "Cleaning up temporary Postgres database..." dropdb $POSTGRES_DB_NAME diff --git a/synapse/storage/schema/state/delta/30/state_stream.sql b/synapse/storage/schema/state/delta/30/state_stream.sql index e85699e82e..bdaf8b02d5 100644 --- a/synapse/storage/schema/state/delta/30/state_stream.sql +++ b/synapse/storage/schema/state/delta/30/state_stream.sql @@ -26,6 +26,10 @@ * (event, state) pair, we can use that stream_ordering to identify when * the new state was assigned for the event. */ + +/* NB: This table belongs to the `main` logical database; it should not be present + * in `state`. + */ CREATE TABLE IF NOT EXISTS ex_outlier_stream( event_stream_ordering BIGINT PRIMARY KEY NOT NULL, event_id TEXT NOT NULL, -- cgit 1.5.1 From 12dacecabd27680dc77c17724953ecda0801b5ea Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 13 Sep 2022 16:14:28 +0200 Subject: Make sequence `cache_invalidation_stream_seq` begin at `2` (#13766) Signed-off-by: Mathieu Velten Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/13766.bugfix | 1 + synapse/storage/schema/__init__.py | 1 + ...8begin_cache_invalidation_seq_at_2.sql.postgres | 23 ++++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 changelog.d/13766.bugfix create mode 100644 synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres (limited to 'synapse/storage/schema') diff --git a/changelog.d/13766.bugfix b/changelog.d/13766.bugfix new file mode 100644 index 0000000000..c708e54f9c --- /dev/null +++ b/changelog.d/13766.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where the `cache_invalidation_stream_seq` sequence would begin at 1 instead of 2. diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 256f745dc0..32cda5e3ba 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -76,6 +76,7 @@ Changes in SCHEMA_VERSION = 72: - event_edges.(room_id, is_state) are no longer written to. - Tables related to groups are dropped. - Unused column application_services_state.last_txn is dropped + - Cache invalidation stream id sequence now begins at 2 to match code expectation. """ diff --git a/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres b/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres new file mode 100644 index 0000000000..69931fe971 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres @@ -0,0 +1,23 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- The sequence needs to begin at 2 because a bunch of code assumes that +-- get_next_id_txn will return values >= 2, cf this comment: +-- https://github.com/matrix-org/synapse/blob/b93bd95e8ab64d27ae26841020f62ee61272a5f2/synapse/storage/util/id_generators.py#L344 + +SELECT setval('cache_invalidation_stream_seq', ( + SELECT COALESCE(MAX(last_value), 1) FROM cache_invalidation_stream_seq +)); -- cgit 1.5.1 From 51a77e990b7a59e460ab22a2788ab8c3506b9a2c Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 14 Sep 2022 14:16:12 +0100 Subject: Remove incorrect migration file from `state` logical DB (#13788) * Remove incorrect migration file from `state` logical DB The table `ex_outlier_stream` is part of the `main` logical DB; it should not have been created in the `state` logical DB. We remove this migration now as a tidy-up. Note: we cannot `DROP TABLE IF EXISTS ex_outlier_stream` in a new migration, because some (most) instances of Synapse host both of these logical DBs on the same DB cluster. * Changelog --- changelog.d/13788.misc | 1 + .../storage/schema/state/delta/30/state_stream.sql | 37 ---------------------- 2 files changed, 1 insertion(+), 37 deletions(-) create mode 100644 changelog.d/13788.misc delete mode 100644 synapse/storage/schema/state/delta/30/state_stream.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13788.misc b/changelog.d/13788.misc new file mode 100644 index 0000000000..7263b1ac52 --- /dev/null +++ b/changelog.d/13788.misc @@ -0,0 +1 @@ +Remove an old, incorrect migration file. diff --git a/synapse/storage/schema/state/delta/30/state_stream.sql b/synapse/storage/schema/state/delta/30/state_stream.sql deleted file mode 100644 index bdaf8b02d5..0000000000 --- a/synapse/storage/schema/state/delta/30/state_stream.sql +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/* We used to create a table called current_state_resets, but this is no - * longer used and is removed in delta 54. - */ - -/* The outlier events that have aquired a state group typically through - * backfill. This is tracked separately to the events table, as assigning a - * state group change the position of the existing event in the stream - * ordering. - * However since a stream_ordering is assigned in persist_event for the - * (event, state) pair, we can use that stream_ordering to identify when - * the new state was assigned for the event. - */ - -/* NB: This table belongs to the `main` logical database; it should not be present - * in `state`. - */ -CREATE TABLE IF NOT EXISTS ex_outlier_stream( - event_stream_ordering BIGINT PRIMARY KEY NOT NULL, - event_id TEXT NOT NULL, - state_group BIGINT NOT NULL -); -- cgit 1.5.1 From f2d12ccabef17faa0bf6b34fbb6d944849afc4d4 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 14 Sep 2022 12:01:42 -0400 Subject: Use partial indices on SQLIte. (#13802) Partial indices have been supported since SQLite 3.8, but Synapse now requires >= 3.27, so we can enable support for them. This requires rebuilding previous indices which were partial on PostgreSQL, but not on SQLite. --- changelog.d/13802.misc | 1 + synapse/storage/background_updates.py | 6 +-- .../storage/databases/main/event_push_actions.py | 1 - .../main/delta/72/09partial_indices.sql.sqlite | 56 ++++++++++++++++++++++ 4 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 changelog.d/13802.misc create mode 100644 synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite (limited to 'synapse/storage/schema') diff --git a/changelog.d/13802.misc b/changelog.d/13802.misc new file mode 100644 index 0000000000..0d55071326 --- /dev/null +++ b/changelog.d/13802.misc @@ -0,0 +1 @@ +Use partial indices on SQLite. diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 555b4e77d2..cf1eabc437 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -581,9 +581,6 @@ class BackgroundUpdater: def create_index_sqlite(conn: Connection) -> None: # Sqlite doesn't support concurrent creation of indexes. # - # We don't use partial indices on SQLite as it wasn't introduced - # until 3.8, and wheezy and CentOS 7 have 3.7 - # # We assume that sqlite doesn't give us invalid indices; however # we may still end up with the index existing but the # background_updates not having been recorded if synapse got shut @@ -591,12 +588,13 @@ class BackgroundUpdater: # has supported CREATE TABLE|INDEX IF NOT EXISTS since 3.3.0.) sql = ( "CREATE %(unique)s INDEX IF NOT EXISTS %(name)s ON %(table)s" - " (%(columns)s)" + " (%(columns)s) %(where_clause)s" ) % { "unique": "UNIQUE" if unique else "", "name": index_name, "table": table, "columns": ", ".join(columns), + "where_clause": "WHERE " + where_clause if where_clause else "", } c = conn.cursor() diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index f4a07de2a3..3a3fb8c507 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -1255,7 +1255,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore): table="event_push_actions", columns=["highlight", "stream_ordering"], where_clause="highlight=0", - psql_only=True, ) async def get_push_actions_for_user( diff --git a/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite b/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite new file mode 100644 index 0000000000..c8dfdf0218 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite @@ -0,0 +1,56 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- SQLite needs to rebuild indices which use partial indices on Postgres, but +-- previously did not use them on SQLite. + +-- Drop each index that was added with register_background_index_update AND specified +-- a where_clause (that existed before this delta). + +-- From events_bg_updates.py +DROP INDEX IF EXISTS event_contains_url_index; +-- There is also a redactions_censored_redacts index, but that gets dropped. +DROP INDEX IF EXISTS redactions_have_censored_ts; +-- There is also a PostgreSQL only index (event_contains_url_index2) +-- which gets renamed to event_contains_url_index. + +-- From roommember.py +DROP INDEX IF EXISTS room_memberships_user_room_forgotten; + +-- From presence.py +DROP INDEX IF EXISTS presence_stream_state_not_offline_idx; + +-- From media_repository.py +DROP INDEX IF EXISTS local_media_repository_url_idx; + +-- From event_push_actions.py +DROP INDEX IF EXISTS event_push_actions_highlights_index; +-- There's also a event_push_actions_stream_highlight_index which was previously +-- PostgreSQL-only. + +-- From state.py +DROP INDEX IF EXISTS current_state_events_member_index; + +-- Re-insert the background jobs to re-create the indices. +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (7209, 'event_contains_url_index', '{}', NULL), + (7209, 'redactions_have_censored_ts_idx', '{}', NULL), + (7209, 'room_membership_forgotten_idx', '{}', NULL), + (7209, 'presence_stream_not_offline_index', '{}', NULL), + (7209, 'local_media_repository_url_idx', '{}', NULL), + (7209, 'event_push_actions_highlights_index', '{}', NULL), + (7209, 'event_push_actions_stream_highlight_index', '{}', NULL), + (7209, 'current_state_members_idx', '{}', NULL) +ON CONFLICT (update_name) DO NOTHING; -- cgit 1.5.1 From 666ae877292d4747b9441105e3df8558f7a335c0 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 14 Sep 2022 13:11:16 -0400 Subject: Update event push action and receipt tables to support threads. (#13753) Adds a `thread_id` column to the `event_push_actions`, `event_push_actions_staging`, and `event_push_summary` tables. This will notifications to be segmented by the thread in a future pull request. The `thread_id` column stores the root event ID or the special value `"main"`. The `thread_id` column for `event_push_actions` and `event_push_summary` is backfilled with `"main"` for all existing rows. New entries into `event_push_actions` and `event_push_actions_staging` will get the proper thread ID. `receipts_linearized` and `receipts_graph` also gain a `thread_id` column, which is similar, except `NULL` is a special value meaning the receipt is "unthreaded". See MSC3771 and MSC3773 for where this data will be useful. --- changelog.d/13753.misc | 1 + synapse/push/bulk_push_rule_evaluator.py | 29 ++--- .../storage/databases/main/event_push_actions.py | 121 ++++++++++++++++++++- synapse/storage/databases/main/events.py | 4 +- synapse/storage/databases/main/receipts.py | 20 ++++ synapse/storage/schema/__init__.py | 6 +- .../main/delta/72/06thread_notifications.sql | 30 +++++ .../main/delta/72/07thread_receipts.sql.postgres | 30 +++++ .../main/delta/72/07thread_receipts.sql.sqlite | 70 ++++++++++++ .../schema/main/delta/72/08thread_receipts.sql | 20 ++++ tests/replication/slave/storage/test_events.py | 1 + 11 files changed, 312 insertions(+), 20 deletions(-) create mode 100644 changelog.d/13753.misc create mode 100644 synapse/storage/schema/main/delta/72/06thread_notifications.sql create mode 100644 synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres create mode 100644 synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite create mode 100644 synapse/storage/schema/main/delta/72/08thread_receipts.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13753.misc b/changelog.d/13753.misc new file mode 100644 index 0000000000..63de2eb9f9 --- /dev/null +++ b/changelog.d/13753.misc @@ -0,0 +1 @@ +Prepatory work for storing thread IDs for notifications and receipts. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index d1caf8a0f7..3846fbc5f0 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -198,7 +198,7 @@ class BulkPushRuleEvaluator: return pl_event.content if pl_event else {}, sender_level async def _get_mutual_relations( - self, event: EventBase, rules: Iterable[Tuple[PushRule, bool]] + self, parent_id: str, rules: Iterable[Tuple[PushRule, bool]] ) -> Dict[str, Set[Tuple[str, str]]]: """ Fetch event metadata for events which related to the same event as the given event. @@ -206,7 +206,7 @@ class BulkPushRuleEvaluator: If the given event has no relation information, returns an empty dictionary. Args: - event_id: The event ID which is targeted by relations. + parent_id: The event ID which is targeted by relations. rules: The push rules which will be processed for this event. Returns: @@ -220,12 +220,6 @@ class BulkPushRuleEvaluator: if not self._relations_match_enabled: return {} - # If the event does not have a relation, then cannot have any mutual - # relations. - relation = relation_from_event(event) - if not relation: - return {} - # Pre-filter to figure out which relation types are interesting. rel_types = set() for rule, enabled in rules: @@ -246,9 +240,7 @@ class BulkPushRuleEvaluator: return {} # If any valid rules were found, fetch the mutual relations. - return await self.store.get_mutual_event_relations( - relation.parent_id, rel_types - ) + return await self.store.get_mutual_event_relations(parent_id, rel_types) @measure_func("action_for_event_by_user") async def action_for_event_by_user( @@ -281,9 +273,17 @@ class BulkPushRuleEvaluator: sender_power_level, ) = await self._get_power_levels_and_sender_level(event, context) - relations = await self._get_mutual_relations( - event, itertools.chain(*rules_by_user.values()) - ) + relation = relation_from_event(event) + # If the event does not have a relation, then cannot have any mutual + # relations or thread ID. + relations = {} + thread_id = "main" + if relation: + relations = await self._get_mutual_relations( + relation.parent_id, itertools.chain(*rules_by_user.values()) + ) + if relation.rel_type == RelationTypes.THREAD: + thread_id = relation.parent_id evaluator = PushRuleEvaluatorForEvent( event, @@ -352,6 +352,7 @@ class BulkPushRuleEvaluator: event.event_id, actions_by_user, count_as_unread, + thread_id, ) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 3a3fb8c507..6b8668d2dc 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -98,6 +98,7 @@ from synapse.storage.database import ( ) from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.stream import StreamWorkerStore +from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached @@ -232,6 +233,104 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas replaces_index="event_push_summary_user_rm", ) + self.db_pool.updates.register_background_index_update( + "event_push_summary_unique_index2", + index_name="event_push_summary_unique_index2", + table="event_push_summary", + columns=["user_id", "room_id", "thread_id"], + unique=True, + ) + + self.db_pool.updates.register_background_update_handler( + "event_push_backfill_thread_id", + self._background_backfill_thread_id, + ) + + async def _background_backfill_thread_id( + self, progress: JsonDict, batch_size: int + ) -> int: + """ + Fill in the thread_id field for event_push_actions and event_push_summary. + + This is preparatory so that it can be made non-nullable in the future. + + Because all current (null) data is done in an unthreaded manner this + simply assumes it is on the "main" timeline. Since event_push_actions + are periodically cleared it is not possible to correctly re-calculate + the thread_id. + """ + event_push_actions_done = progress.get("event_push_actions_done", False) + + def add_thread_id_txn( + txn: LoggingTransaction, table_name: str, start_stream_ordering: int + ) -> int: + sql = f""" + SELECT stream_ordering + FROM {table_name} + WHERE + thread_id IS NULL + AND stream_ordering > ? + ORDER BY stream_ordering + LIMIT ? + """ + txn.execute(sql, (start_stream_ordering, batch_size)) + + # No more rows to process. + rows = txn.fetchall() + if not rows: + progress[f"{table_name}_done"] = True + self.db_pool.updates._background_update_progress_txn( + txn, "event_push_backfill_thread_id", progress + ) + return 0 + + # Update the thread ID for any of those rows. + max_stream_ordering = rows[-1][0] + + sql = f""" + UPDATE {table_name} + SET thread_id = 'main' + WHERE stream_ordering <= ? AND thread_id IS NULL + """ + txn.execute(sql, (max_stream_ordering,)) + + # Update progress. + processed_rows = txn.rowcount + progress[f"max_{table_name}_stream_ordering"] = max_stream_ordering + self.db_pool.updates._background_update_progress_txn( + txn, "event_push_backfill_thread_id", progress + ) + + return processed_rows + + # First update the event_push_actions table, then the event_push_summary table. + # + # Note that the event_push_actions_staging table is ignored since it is + # assumed that items in that table will only exist for a short period of + # time. + if not event_push_actions_done: + result = await self.db_pool.runInteraction( + "event_push_backfill_thread_id", + add_thread_id_txn, + "event_push_actions", + progress.get("max_event_push_actions_stream_ordering", 0), + ) + else: + result = await self.db_pool.runInteraction( + "event_push_backfill_thread_id", + add_thread_id_txn, + "event_push_summary", + progress.get("max_event_push_summary_stream_ordering", 0), + ) + + # Only done after the event_push_summary table is done. + if not result: + await self.db_pool.updates._end_background_update( + "event_push_backfill_thread_id" + ) + + return result + @cached(tree=True, max_entries=5000) async def get_unread_event_push_actions_by_room_for_user( self, @@ -670,6 +769,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas event_id: str, user_id_actions: Dict[str, Collection[Union[Mapping, str]]], count_as_unread: bool, + thread_id: str, ) -> None: """Add the push actions for the event to the push action staging area. @@ -678,6 +778,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id_actions: A mapping of user_id to list of push actions, where an action can either be a string or dict. count_as_unread: Whether this event should increment unread counts. + thread_id: The thread this event is parent of, if applicable. """ if not user_id_actions: return @@ -686,7 +787,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # can be used to insert into the `event_push_actions_staging` table. def _gen_entry( user_id: str, actions: Collection[Union[Mapping, str]] - ) -> Tuple[str, str, str, int, int, int]: + ) -> Tuple[str, str, str, int, int, int, str]: is_highlight = 1 if _action_has_highlight(actions) else 0 notif = 1 if "notify" in actions else 0 return ( @@ -696,11 +797,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas notif, # notif column is_highlight, # highlight column int(count_as_unread), # unread column + thread_id, # thread_id column ) await self.db_pool.simple_insert_many( "event_push_actions_staging", - keys=("event_id", "user_id", "actions", "notif", "highlight", "unread"), + keys=( + "event_id", + "user_id", + "actions", + "notif", + "highlight", + "unread", + "thread_id", + ), values=[ _gen_entry(user_id, actions) for user_id, actions in user_id_actions.items() @@ -981,6 +1091,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) # Replace the previous summary with the new counts. + # + # TODO(threads): Upsert per-thread instead of setting them all to main. self.db_pool.simple_upsert_txn( txn, table="event_push_summary", @@ -990,6 +1102,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas "unread_count": unread_count, "stream_ordering": old_rotate_stream_ordering, "last_receipt_stream_ordering": stream_ordering, + "thread_id": "main", }, ) @@ -1138,17 +1251,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas logger.info("Rotating notifications, handling %d rows", len(summaries)) + # TODO(threads): Update on a per-thread basis. self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", key_names=("user_id", "room_id"), key_values=[(user_id, room_id) for user_id, room_id in summaries], - value_names=("notif_count", "unread_count", "stream_ordering"), + value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"), value_values=[ ( summary.notif_count, summary.unread_count, summary.stream_ordering, + "main", ) for summary in summaries.values() ], diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index a4010ee28d..c0b4080e4b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2192,9 +2192,9 @@ class PersistEventsStore: sql = """ INSERT INTO event_push_actions ( room_id, event_id, user_id, actions, stream_ordering, - topological_ordering, notif, highlight, unread + topological_ordering, notif, highlight, unread, thread_id ) - SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread + SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread, thread_id FROM event_push_actions_staging WHERE event_id = ? """ diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 719a12b0ae..ddb8e80b69 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -113,6 +113,24 @@ class ReceiptsWorkerStore(SQLBaseStore): prefilled_cache=receipts_stream_prefill, ) + self.db_pool.updates.register_background_index_update( + "receipts_linearized_unique_index", + index_name="receipts_linearized_unique_index", + table="receipts_linearized", + columns=["room_id", "receipt_type", "user_id"], + where_clause="thread_id IS NULL", + unique=True, + ) + + self.db_pool.updates.register_background_index_update( + "receipts_graph_unique_index", + index_name="receipts_graph_unique_index", + table="receipts_graph", + columns=["room_id", "receipt_type", "user_id"], + where_clause="thread_id IS NULL", + unique=True, + ) + def get_max_receipt_stream_id(self) -> int: """Get the current max stream ID for receipts stream""" return self._receipts_id_gen.get_current_token() @@ -677,6 +695,7 @@ class ReceiptsWorkerStore(SQLBaseStore): "event_id": event_id, "event_stream_ordering": stream_ordering, "data": json_encoder.encode(data), + "thread_id": None, }, # receipts_linearized has a unique constraint on # (user_id, room_id, receipt_type), so no need to lock @@ -824,6 +843,7 @@ class ReceiptsWorkerStore(SQLBaseStore): values={ "event_ids": json_encoder.encode(event_ids), "data": json_encoder.encode(data), + "thread_id": None, }, # receipts_graph has a unique constraint on # (user_id, room_id, receipt_type), so no need to lock diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 32cda5e3ba..38c9532bfd 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 72 # remember to update the list below when updating +SCHEMA_VERSION = 73 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -77,6 +77,10 @@ Changes in SCHEMA_VERSION = 72: - Tables related to groups are dropped. - Unused column application_services_state.last_txn is dropped - Cache invalidation stream id sequence now begins at 2 to match code expectation. + +Changes in SCHEMA_VERSION = 73; + - thread_id column is added to event_push_actions, event_push_actions_staging + event_push_summary, receipts_linearized, and receipts_graph. """ diff --git a/synapse/storage/schema/main/delta/72/06thread_notifications.sql b/synapse/storage/schema/main/delta/72/06thread_notifications.sql new file mode 100644 index 0000000000..2f4f5dac7a --- /dev/null +++ b/synapse/storage/schema/main/delta/72/06thread_notifications.sql @@ -0,0 +1,30 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a nullable column for thread ID to the event push actions tables; this +-- will be filled in with a default value for any previously existing rows. +-- +-- After migration this can be made non-nullable. + +ALTER TABLE event_push_actions_staging ADD COLUMN thread_id TEXT; +ALTER TABLE event_push_actions ADD COLUMN thread_id TEXT; +ALTER TABLE event_push_summary ADD COLUMN thread_id TEXT; + +-- Update the unique index for `event_push_summary`. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7006, 'event_push_summary_unique_index2', '{}'); + +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (7006, 'event_push_backfill_thread_id', '{}', 'event_push_summary_unique_index2'); diff --git a/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres new file mode 100644 index 0000000000..55fff9e278 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres @@ -0,0 +1,30 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a nullable column for thread ID to the receipts table; this allows a +-- receipt per user, per room, as well as an unthreaded receipt (corresponding +-- to a null thread ID). + +ALTER TABLE receipts_linearized ADD COLUMN thread_id TEXT; +ALTER TABLE receipts_graph ADD COLUMN thread_id TEXT; + +-- Rebuild the unique constraint with the thread_id. +ALTER TABLE receipts_linearized + ADD CONSTRAINT receipts_linearized_uniqueness_thread + UNIQUE (room_id, receipt_type, user_id, thread_id); + +ALTER TABLE receipts_graph + ADD CONSTRAINT receipts_graph_uniqueness_thread + UNIQUE (room_id, receipt_type, user_id, thread_id); diff --git a/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite new file mode 100644 index 0000000000..232f67deb4 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite @@ -0,0 +1,70 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Allow multiple receipts per user per room via a nullable thread_id column. +-- +-- SQLite doesn't support modifying constraints to an existing table, so it must +-- be recreated. + +-- Create the new tables. +CREATE TABLE receipts_linearized_new ( + stream_id BIGINT NOT NULL, + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL, + thread_id TEXT, + event_stream_ordering BIGINT, + data TEXT NOT NULL, + CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id), + CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); + +CREATE TABLE receipts_graph_new ( + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_ids TEXT NOT NULL, + thread_id TEXT, + data TEXT NOT NULL, + CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id), + CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); + +-- Drop the old indexes. +DROP INDEX IF EXISTS receipts_linearized_id; +DROP INDEX IF EXISTS receipts_linearized_room_stream; +DROP INDEX IF EXISTS receipts_linearized_user; + +-- Copy the data. +INSERT INTO receipts_linearized_new (stream_id, room_id, receipt_type, user_id, event_id, event_stream_ordering, data) + SELECT stream_id, room_id, receipt_type, user_id, event_id, event_stream_ordering, data + FROM receipts_linearized; +INSERT INTO receipts_graph_new (room_id, receipt_type, user_id, event_ids, data) + SELECT room_id, receipt_type, user_id, event_ids, data + FROM receipts_graph; + +-- Drop the old tables. +DROP TABLE receipts_linearized; +DROP TABLE receipts_graph; + +-- Rename the tables. +ALTER TABLE receipts_linearized_new RENAME TO receipts_linearized; +ALTER TABLE receipts_graph_new RENAME TO receipts_graph; + +-- Create the indices. +CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id ); +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id ); +CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id ); diff --git a/synapse/storage/schema/main/delta/72/08thread_receipts.sql b/synapse/storage/schema/main/delta/72/08thread_receipts.sql new file mode 100644 index 0000000000..e35b021f31 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/08thread_receipts.sql @@ -0,0 +1,20 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7007, 'receipts_linearized_unique_index', '{}'); + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7007, 'receipts_graph_unique_index', '{}'); diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 531a0db2d0..49a21e2e85 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -404,6 +404,7 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): event.event_id, {user_id: actions for user_id, actions in push_actions}, False, + "main", ) ) return event, context -- cgit 1.5.1 From 957e3d74fc70f92bb9ed3c709f87752bf77a8c90 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 13:57:50 -0500 Subject: Keep track when we try and fail to process a pulled event (#13589) We can follow-up this PR with: 1. Only try to backfill from an event if we haven't tried recently -> https://github.com/matrix-org/synapse/issues/13622 1. When we decide to backfill that event again, process it in the background so it doesn't block and make `/messages` slow when we know it will probably fail again -> https://github.com/matrix-org/synapse/issues/13623 1. Generally track failures everywhere we try and fail to pull an event over federation -> https://github.com/matrix-org/synapse/issues/13700 Fix https://github.com/matrix-org/synapse/issues/13621 Part of https://github.com/matrix-org/synapse/issues/13356 Mentioned in [internal doc](https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.qv7cj51sv9i5) --- changelog.d/13589.feature | 1 + synapse/handlers/federation_event.py | 7 + synapse/storage/databases/main/event_federation.py | 45 +++++ synapse/storage/databases/main/events.py | 32 ++- synapse/storage/schema/__init__.py | 2 + .../main/delta/73/01event_failed_pull_attempts.sql | 29 +++ tests/handlers/test_federation_event.py | 222 +++++++++++++++++++++ 7 files changed, 329 insertions(+), 9 deletions(-) create mode 100644 changelog.d/13589.feature create mode 100644 synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13589.feature b/changelog.d/13589.feature new file mode 100644 index 0000000000..78fa1ddb52 --- /dev/null +++ b/changelog.d/13589.feature @@ -0,0 +1 @@ +Keep track when we attempt to backfill an event but fail so we can intelligently back-off in the future. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index ace7adcffb..9e065e1116 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -862,6 +862,9 @@ class FederationEventHandler: self._sanity_check_event(event) except SynapseError as err: logger.warning("Event %s failed sanity check: %s", event_id, err) + await self._store.record_event_failed_pull_attempt( + event.room_id, event_id, str(err) + ) return try: @@ -897,6 +900,10 @@ class FederationEventHandler: backfilled=backfilled, ) except FederationError as e: + await self._store.record_event_failed_pull_attempt( + event.room_id, event_id, str(e) + ) + if e.code == 403: logger.warning("Pulled event %s failed history check.", event_id) else: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ca47a22bf1..ef477978ed 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1294,6 +1294,51 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas return event_id_results + @trace + async def record_event_failed_pull_attempt( + self, room_id: str, event_id: str, cause: str + ) -> None: + """ + Record when we fail to pull an event over federation. + + This information allows us to be more intelligent when we decide to + retry (we don't need to fail over and over) and we can process that + event in the background so we don't block on it each time. + + Args: + room_id: The room where the event failed to pull from + event_id: The event that failed to be fetched or processed + cause: The error message or reason that we failed to pull the event + """ + await self.db_pool.runInteraction( + "record_event_failed_pull_attempt", + self._record_event_failed_pull_attempt_upsert_txn, + room_id, + event_id, + cause, + db_autocommit=True, # Safe as it's a single upsert + ) + + def _record_event_failed_pull_attempt_upsert_txn( + self, + txn: LoggingTransaction, + room_id: str, + event_id: str, + cause: str, + ) -> None: + sql = """ + INSERT INTO event_failed_pull_attempts ( + room_id, event_id, num_attempts, last_attempt_ts, last_cause + ) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT (room_id, event_id) DO UPDATE SET + num_attempts=event_failed_pull_attempts.num_attempts + 1, + last_attempt_ts=EXCLUDED.last_attempt_ts, + last_cause=EXCLUDED.last_cause; + """ + + txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause)) + async def get_missing_events( self, room_id: str, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c0b4080e4b..1b54a2eb57 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2435,17 +2435,31 @@ class PersistEventsStore: "DELETE FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" ) + backward_extremity_tuples_to_remove = [ + (ev.event_id, ev.room_id) + for ev in events + if not ev.internal_metadata.is_outlier() + # If we encountered an event with no prev_events, then we might + # as well remove it now because it won't ever have anything else + # to backfill from. + or len(ev.prev_event_ids()) == 0 + ] txn.execute_batch( query, - [ - (ev.event_id, ev.room_id) - for ev in events - if not ev.internal_metadata.is_outlier() - # If we encountered an event with no prev_events, then we might - # as well remove it now because it won't ever have anything else - # to backfill from. - or len(ev.prev_event_ids()) == 0 - ], + backward_extremity_tuples_to_remove, + ) + + # Clear out the failed backfill attempts after we successfully pulled + # the event. Since we no longer need these events as backward + # extremities, it also means that they won't be backfilled from again so + # we no longer need to store the backfill attempts around it. + query = """ + DELETE FROM event_failed_pull_attempts + WHERE event_id = ? and room_id = ? + """ + txn.execute_batch( + query, + backward_extremity_tuples_to_remove, ) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 38c9532bfd..68e055c664 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -81,6 +81,8 @@ Changes in SCHEMA_VERSION = 72: Changes in SCHEMA_VERSION = 73; - thread_id column is added to event_push_actions, event_push_actions_staging event_push_summary, receipts_linearized, and receipts_graph. + - Add table `event_failed_pull_attempts` to keep track when we fail to pull + events over federation. """ diff --git a/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql b/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql new file mode 100644 index 0000000000..d397ee1082 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql @@ -0,0 +1,29 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Add a table that keeps track of when we failed to pull an event over +-- federation (via /backfill, `/event`, `/get_missing_events`, etc). This allows +-- us to be more intelligent when we decide to retry (we don't need to fail over +-- and over) and we can process that event in the background so we don't block +-- on it each time. +CREATE TABLE IF NOT EXISTS event_failed_pull_attempts( + room_id TEXT NOT NULL REFERENCES rooms (room_id), + event_id TEXT NOT NULL, + num_attempts INT NOT NULL, + last_attempt_ts BIGINT NOT NULL, + last_cause TEXT NOT NULL, + PRIMARY KEY (room_id, event_id) +); diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 51c8dd6498..b5b89405a4 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -227,3 +227,225 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase): if prev_exists_as_outlier: self.mock_federation_transport_client.get_event.assert_not_called() + + def test_process_pulled_event_records_failed_backfill_attempts( + self, + ) -> None: + """ + Test to make sure that failed backfill attempts for an event are + recorded in the `event_failed_pull_attempts` table. + + In this test, we pretend we are processing a "pulled" event via + backfill. The pulled event has a fake `prev_event` which our server has + obviously never seen before so it attempts to request the state at that + `prev_event` which expectedly fails because it's a fake event. Because + the server can't fetch the state at the missing `prev_event`, the + "pulled" event fails the history check and is fails to process. + + We check that we correctly record the number of failed pull attempts + of the pulled event and as a sanity check, that the "pulled" event isn't + persisted. + """ + OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" + main_store = self.hs.get_datastores().main + + # Create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + room_version = self.get_success(main_store.get_room_version(room_id)) + + # We expect an outbound request to /state_ids, so stub that out + self.mock_federation_transport_client.get_room_state_ids.return_value = make_awaitable( + { + # Mimic the other server not knowing about the state at all. + # We want to cause Synapse to throw an error (`Unable to get + # missing prev_event $fake_prev_event`) and fail to backfill + # the pulled event. + "pdu_ids": [], + "auth_chain_ids": [], + } + ) + # We also expect an outbound request to /state + self.mock_federation_transport_client.get_room_state.return_value = make_awaitable( + StateRequestResponse( + # Mimic the other server not knowing about the state at all. + # We want to cause Synapse to throw an error (`Unable to get + # missing prev_event $fake_prev_event`) and fail to backfill + # the pulled event. + auth_events=[], + state=[], + ) + ) + + pulled_event = make_event_from_dict( + self.add_hashes_and_signatures_from_other_server( + { + "type": "test_regular_type", + "room_id": room_id, + "sender": OTHER_USER, + "prev_events": [ + # The fake prev event will make the pulled event fail + # the history check (`Unable to get missing prev_event + # $fake_prev_event`) + "$fake_prev_event" + ], + "auth_events": [], + "origin_server_ts": 1, + "depth": 12, + "content": {"body": "pulled"}, + } + ), + room_version, + ) + + # The function under test: try to process the pulled event + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure our failed pull attempt was recorded + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_failed_pull_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 1) + + # The function under test: try to process the pulled event again + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure our second failed pull attempt was recorded (`num_attempts` was incremented) + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_failed_pull_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 2) + + # And as a sanity check, make sure the event was not persisted through all of this. + persisted = self.get_success( + main_store.get_event(pulled_event.event_id, allow_none=True) + ) + self.assertIsNone( + persisted, + "pulled event that fails the history check should not be persisted at all", + ) + + def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_persisted( + self, + ) -> None: + """ + Test to make sure that failed pull attempts + (`event_failed_pull_attempts` table) for an event are cleared after the + event is successfully persisted. + + In this test, we pretend we are processing a "pulled" event via + backfill. The pulled event succesfully processes and the backward + extremeties are updated along with clearing out any failed pull attempts + for those old extremities. + + We check that we correctly cleared failed pull attempts of the + pulled event. + """ + OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" + main_store = self.hs.get_datastores().main + + # Create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + room_version = self.get_success(main_store.get_room_version(room_id)) + + # allow the remote user to send state events + self.helper.send_state( + room_id, + "m.room.power_levels", + {"events_default": 0, "state_default": 0}, + tok=tok, + ) + + # add the remote user to the room + member_event = self.get_success( + event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join") + ) + + initial_state_map = self.get_success( + main_store.get_partial_current_state_ids(room_id) + ) + + auth_event_ids = [ + initial_state_map[("m.room.create", "")], + initial_state_map[("m.room.power_levels", "")], + member_event.event_id, + ] + + pulled_event = make_event_from_dict( + self.add_hashes_and_signatures_from_other_server( + { + "type": "test_regular_type", + "room_id": room_id, + "sender": OTHER_USER, + "prev_events": [member_event.event_id], + "auth_events": auth_event_ids, + "origin_server_ts": 1, + "depth": 12, + "content": {"body": "pulled"}, + } + ), + room_version, + ) + + # Fake the "pulled" event failing to backfill once so we can test + # if it's cleared out later on. + self.get_success( + main_store.record_event_failed_pull_attempt( + pulled_event.room_id, pulled_event.event_id, "fake cause" + ) + ) + # Make sure we have a failed pull attempt recorded for the pulled event + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_failed_pull_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 1) + + # The function under test: try to process the pulled event + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure the failed pull attempts for the pulled event are cleared + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_failed_pull_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + allow_none=True, + ) + ) + self.assertIsNone(backfill_num_attempts) + + # And as a sanity check, make sure the "pulled" event was persisted. + persisted = self.get_success( + main_store.get_event(pulled_event.event_id, allow_none=True) + ) + self.assertIsNotNone(persisted, "pulled event was not persisted at all") -- cgit 1.5.1 From 44be42338e032a50e5fc3d6c69be4055f33cb26c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 16 Sep 2022 10:56:56 -0500 Subject: Add support to purge rows from MSC2716 and other tables when purging a room (#13825) `event_failed_pull_attempts` added in https://github.com/matrix-org/synapse/pull/13589 MSC2716 related tables added in: - https://github.com/matrix-org/synapse/pull/10245/files#diff-3d42dfb44d02f7de3aada105e0bdc1cc9dd7f953cbf0f36c5d0f50827bf0320aR1 - Renamed in https://github.com/matrix-org/synapse/pull/10838/files#diff-2730bfbe9e688b55e46f9371aefe67dac2bd2b2b7d9d6b92774eea1fcfae156dR1 - https://github.com/matrix-org/synapse/pull/10498/files#diff-c52bbfbb5921a3f6f023b24343668479d966fac164f13b7c39d2197ce3afa7a5R1 --- changelog.d/13825.bugfix | 1 + synapse/storage/databases/main/purge_events.py | 5 +++++ synapse/storage/schema/__init__.py | 2 ++ .../delta/73/02room_id_indexes_for_purging.sql | 22 ++++++++++++++++++++++ 4 files changed, 30 insertions(+) create mode 100644 changelog.d/13825.bugfix create mode 100644 synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13825.bugfix b/changelog.d/13825.bugfix new file mode 100644 index 0000000000..626fc6349f --- /dev/null +++ b/changelog.d/13825.bugfix @@ -0,0 +1 @@ +Delete associated data from `event_failed_pull_attempts`, `insertion_events`, `insertion_event_extremities`, `insertion_event_extremities`, `insertion_event_extremities` when purging the room. diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index f6822707e4..9213ce0b5a 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -419,6 +419,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "event_forward_extremities", "event_push_actions", "event_search", + "event_failed_pull_attempts", "partial_state_events", "events", "federation_inbound_events_staging", @@ -441,6 +442,10 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "e2e_room_keys", "event_push_summary", "pusher_throttle", + "insertion_events", + "insertion_event_extremities", + "insertion_event_edges", + "batch_events", "room_account_data", "room_tags", # "rooms" happens last, to keep the foreign keys in the other tables diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 68e055c664..f29424d17a 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -83,6 +83,8 @@ Changes in SCHEMA_VERSION = 73; event_push_summary, receipts_linearized, and receipts_graph. - Add table `event_failed_pull_attempts` to keep track when we fail to pull events over federation. + - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`, + `batch_events`) to make it easy to delete all associated rows when purging a room. """ diff --git a/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql b/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql new file mode 100644 index 0000000000..6d38bdd430 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql @@ -0,0 +1,22 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add index so we can easily purge all rows from a given `room_id` +CREATE INDEX IF NOT EXISTS event_failed_pull_attempts_room_id ON event_failed_pull_attempts(room_id); + +-- MSC2716 related tables: +-- Add indexes so we can easily purge all rows from a given `room_id` +CREATE INDEX IF NOT EXISTS insertion_events_room_id ON insertion_events(room_id); +CREATE INDEX IF NOT EXISTS batch_events_room_id ON batch_events(room_id); -- cgit 1.5.1 From 8ae42ab8fa3c6b52d74c24daa7ca75a478fa4fbb Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 21 Sep 2022 15:39:01 +0100 Subject: Support enabling/disabling pushers (from MSC3881) (#13799) Partial implementation of MSC3881 --- changelog.d/13799.feature | 1 + synapse/_scripts/synapse_port_db.py | 1 + synapse/config/experimental.py | 3 + synapse/handlers/register.py | 4 +- synapse/push/__init__.py | 2 + synapse/push/pusherpool.py | 81 ++++++++--- synapse/replication/tcp/client.py | 10 +- synapse/rest/admin/users.py | 4 +- synapse/rest/client/pusher.py | 18 ++- synapse/storage/databases/main/pusher.py | 69 ++++++---- .../schema/main/delta/73/02add_pusher_enabled.sql | 16 +++ tests/push/test_email.py | 4 +- tests/push/test_http.py | 148 +++++++++++++++++++-- tests/replication/test_pusher_shard.py | 2 +- tests/rest/admin/test_user.py | 2 +- 15 files changed, 294 insertions(+), 71 deletions(-) create mode 100644 changelog.d/13799.feature create mode 100644 synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13799.feature b/changelog.d/13799.feature new file mode 100644 index 0000000000..6c8e5cffe2 --- /dev/null +++ b/changelog.d/13799.feature @@ -0,0 +1 @@ +Add experimental support for [MSC3881: Remotely toggle push notifications for another client](https://github.com/matrix-org/matrix-spec-proposals/pull/3881). diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 30983c47fb..450ba462ba 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -111,6 +111,7 @@ BOOLEAN_COLUMNS = { "e2e_fallback_keys_json": ["used"], "access_tokens": ["used"], "device_lists_changes_in_room": ["converted_to_destinations"], + "pushers": ["enabled"], } diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 702b81e636..f4541a8db0 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -93,3 +93,6 @@ class ExperimentalConfig(Config): # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices. self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False) + + # MSC3881: Remotely toggle push notifications for another client + self.msc3881_enabled: bool = experimental.get("msc3881_enabled", False) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 20ec22105a..cfcadb34db 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -997,7 +997,7 @@ class RegistrationHandler: assert user_tuple token_id = user_tuple.token_id - await self.pusher_pool.add_pusher( + await self.pusher_pool.add_or_update_pusher( user_id=user_id, access_token=token_id, kind="email", @@ -1005,7 +1005,7 @@ class RegistrationHandler: app_display_name="Email Notifications", device_display_name=threepid["address"], pushkey=threepid["address"], - lang=None, # We don't know a user's language here + lang=None, data={}, ) diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 57c4d70466..ac99d35a7e 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -116,6 +116,7 @@ class PusherConfig: last_stream_ordering: int last_success: Optional[int] failing_since: Optional[int] + enabled: bool def as_dict(self) -> Dict[str, Any]: """Information that can be retrieved about a pusher after creation.""" @@ -128,6 +129,7 @@ class PusherConfig: "lang": self.lang, "profile_tag": self.profile_tag, "pushkey": self.pushkey, + "enabled": self.enabled, } diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 1e0ef44fc7..2597898cf4 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -94,7 +94,7 @@ class PusherPool: return run_as_background_process("start_pushers", self._start_pushers) - async def add_pusher( + async def add_or_update_pusher( self, user_id: str, access_token: Optional[int], @@ -106,6 +106,7 @@ class PusherPool: lang: Optional[str], data: JsonDict, profile_tag: str = "", + enabled: bool = True, ) -> Optional[Pusher]: """Creates a new pusher and adds it to the pool @@ -147,9 +148,20 @@ class PusherPool: last_stream_ordering=last_stream_ordering, last_success=None, failing_since=None, + enabled=enabled, ) ) + # Before we actually persist the pusher, we check if the user already has one + # for this app ID and pushkey. If so, we want to keep the access token in place, + # since this could be one device modifying (e.g. enabling/disabling) another + # device's pusher. + existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey( + user_id, app_id, pushkey + ) + if existing_config: + access_token = existing_config.access_token + await self.store.add_pusher( user_id=user_id, access_token=access_token, @@ -163,8 +175,9 @@ class PusherPool: data=data, last_stream_ordering=last_stream_ordering, profile_tag=profile_tag, + enabled=enabled, ) - pusher = await self.start_pusher_by_id(app_id, pushkey, user_id) + pusher = await self.process_pusher_change_by_id(app_id, pushkey, user_id) return pusher @@ -276,10 +289,25 @@ class PusherPool: except Exception: logger.exception("Exception in pusher on_new_receipts") - async def start_pusher_by_id( + async def _get_pusher_config_for_user_by_app_id_and_pushkey( + self, user_id: str, app_id: str, pushkey: str + ) -> Optional[PusherConfig]: + resultlist = await self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey) + + pusher_config = None + for r in resultlist: + if r.user_name == user_id: + pusher_config = r + + return pusher_config + + async def process_pusher_change_by_id( self, app_id: str, pushkey: str, user_id: str ) -> Optional[Pusher]: - """Look up the details for the given pusher, and start it + """Look up the details for the given pusher, and either start it if its + "enabled" flag is True, or try to stop it otherwise. + + If the pusher is new and its "enabled" flag is False, the stop is a noop. Returns: The pusher started, if any @@ -290,12 +318,13 @@ class PusherPool: if not self._pusher_shard_config.should_handle(self._instance_name, user_id): return None - resultlist = await self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey) + pusher_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey( + user_id, app_id, pushkey + ) - pusher_config = None - for r in resultlist: - if r.user_name == user_id: - pusher_config = r + if pusher_config and not pusher_config.enabled: + self.maybe_stop_pusher(app_id, pushkey, user_id) + return None pusher = None if pusher_config: @@ -305,7 +334,7 @@ class PusherPool: async def _start_pushers(self) -> None: """Start all the pushers""" - pushers = await self.store.get_all_pushers() + pushers = await self.store.get_enabled_pushers() # Stagger starting up the pushers so we don't completely drown the # process on start up. @@ -363,6 +392,8 @@ class PusherPool: synapse_pushers.labels(type(pusher).__name__, pusher.app_id).inc() + logger.info("Starting pusher %s / %s", pusher.user_id, appid_pushkey) + # Check if there *may* be push to process. We do this as this check is a # lot cheaper to do than actually fetching the exact rows we need to # push. @@ -382,16 +413,7 @@ class PusherPool: return pusher async def remove_pusher(self, app_id: str, pushkey: str, user_id: str) -> None: - appid_pushkey = "%s:%s" % (app_id, pushkey) - - byuser = self.pushers.get(user_id, {}) - - if appid_pushkey in byuser: - logger.info("Stopping pusher %s / %s", user_id, appid_pushkey) - pusher = byuser.pop(appid_pushkey) - pusher.on_stop() - - synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec() + self.maybe_stop_pusher(app_id, pushkey, user_id) # We can only delete pushers on master. if self._remove_pusher_client: @@ -402,3 +424,22 @@ class PusherPool: await self.store.delete_pusher_by_app_id_pushkey_user_id( app_id, pushkey, user_id ) + + def maybe_stop_pusher(self, app_id: str, pushkey: str, user_id: str) -> None: + """Stops a pusher with the given app ID and push key if one is running. + + Args: + app_id: the pusher's app ID. + pushkey: the pusher's push key. + user_id: the user the pusher belongs to. Only used for logging. + """ + appid_pushkey = "%s:%s" % (app_id, pushkey) + + byuser = self.pushers.get(user_id, {}) + + if appid_pushkey in byuser: + logger.info("Stopping pusher %s / %s", user_id, appid_pushkey) + pusher = byuser.pop(appid_pushkey) + pusher.on_stop() + + synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec() diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index e4f2201c92..cf9cd6833b 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -189,7 +189,9 @@ class ReplicationDataHandler: if row.deleted: self.stop_pusher(row.user_id, row.app_id, row.pushkey) else: - await self.start_pusher(row.user_id, row.app_id, row.pushkey) + await self.process_pusher_change( + row.user_id, row.app_id, row.pushkey + ) elif stream_name == EventsStream.NAME: # We shouldn't get multiple rows per token for events stream, so # we don't need to optimise this for multiple rows. @@ -334,13 +336,15 @@ class ReplicationDataHandler: logger.info("Stopping pusher %r / %r", user_id, key) pusher.on_stop() - async def start_pusher(self, user_id: str, app_id: str, pushkey: str) -> None: + async def process_pusher_change( + self, user_id: str, app_id: str, pushkey: str + ) -> None: if not self._notify_pushers: return key = "%s:%s" % (app_id, pushkey) logger.info("Starting pusher %r / %r", user_id, key) - await self._pusher_pool.start_pusher_by_id(app_id, pushkey, user_id) + await self._pusher_pool.process_pusher_change_by_id(app_id, pushkey, user_id) class FederationSenderHandler: diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 2ca6b2d08a..1274773d7e 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -375,7 +375,7 @@ class UserRestServletV2(RestServlet): and self.hs.config.email.email_notif_for_new_users and medium == "email" ): - await self.pusher_pool.add_pusher( + await self.pusher_pool.add_or_update_pusher( user_id=user_id, access_token=None, kind="email", @@ -383,7 +383,7 @@ class UserRestServletV2(RestServlet): app_display_name="Email Notifications", device_display_name=address, pushkey=address, - lang=None, # We don't know a user's language here + lang=None, data={}, ) diff --git a/synapse/rest/client/pusher.py b/synapse/rest/client/pusher.py index 9a1f10f4be..c9f76125dc 100644 --- a/synapse/rest/client/pusher.py +++ b/synapse/rest/client/pusher.py @@ -42,6 +42,7 @@ class PushersRestServlet(RestServlet): super().__init__() self.hs = hs self.auth = hs.get_auth() + self._msc3881_enabled = self.hs.config.experimental.msc3881_enabled async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) @@ -51,9 +52,14 @@ class PushersRestServlet(RestServlet): user.to_string() ) - filtered_pushers = [p.as_dict() for p in pushers] + pusher_dicts = [p.as_dict() for p in pushers] - return 200, {"pushers": filtered_pushers} + for pusher in pusher_dicts: + if self._msc3881_enabled: + pusher["org.matrix.msc3881.enabled"] = pusher["enabled"] + del pusher["enabled"] + + return 200, {"pushers": pusher_dicts} class PushersSetRestServlet(RestServlet): @@ -65,6 +71,7 @@ class PushersSetRestServlet(RestServlet): self.auth = hs.get_auth() self.notifier = hs.get_notifier() self.pusher_pool = self.hs.get_pusherpool() + self._msc3881_enabled = self.hs.config.experimental.msc3881_enabled async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) @@ -103,6 +110,10 @@ class PushersSetRestServlet(RestServlet): if "append" in content: append = content["append"] + enabled = True + if self._msc3881_enabled and "org.matrix.msc3881.enabled" in content: + enabled = content["org.matrix.msc3881.enabled"] + if not append: await self.pusher_pool.remove_pushers_by_app_id_and_pushkey_not_user( app_id=content["app_id"], @@ -111,7 +122,7 @@ class PushersSetRestServlet(RestServlet): ) try: - await self.pusher_pool.add_pusher( + await self.pusher_pool.add_or_update_pusher( user_id=user.to_string(), access_token=requester.access_token_id, kind=content["kind"], @@ -122,6 +133,7 @@ class PushersSetRestServlet(RestServlet): lang=content["lang"], data=content["data"], profile_tag=content.get("profile_tag", ""), + enabled=enabled, ) except PusherConfigException as pce: raise SynapseError( diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index bd0cfa7f32..ee55b8c4a9 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -89,6 +89,11 @@ class PusherWorkerStore(SQLBaseStore): ) continue + # If we're using SQLite, then boolean values are integers. This is + # troublesome since some code using the return value of this method might + # expect it to be a boolean, or will expose it to clients (in responses). + r["enabled"] = bool(r["enabled"]) + yield PusherConfig(**r) async def get_pushers_by_app_id_and_pushkey( @@ -100,38 +105,52 @@ class PusherWorkerStore(SQLBaseStore): return await self.get_pushers_by({"user_name": user_id}) async def get_pushers_by(self, keyvalues: Dict[str, Any]) -> Iterator[PusherConfig]: - ret = await self.db_pool.simple_select_list( - "pushers", - keyvalues, - [ - "id", - "user_name", - "access_token", - "profile_tag", - "kind", - "app_id", - "app_display_name", - "device_display_name", - "pushkey", - "ts", - "lang", - "data", - "last_stream_ordering", - "last_success", - "failing_since", - ], + """Retrieve pushers that match the given criteria. + + Args: + keyvalues: A {column: value} dictionary. + + Returns: + The pushers for which the given columns have the given values. + """ + + def get_pushers_by_txn(txn: LoggingTransaction) -> List[Dict[str, Any]]: + # We could technically use simple_select_list here, but we need to call + # COALESCE on the 'enabled' column. While it is technically possible to give + # simple_select_list the whole `COALESCE(...) AS ...` as a column name, it + # feels a bit hacky, so it's probably better to just inline the query. + sql = """ + SELECT + id, user_name, access_token, profile_tag, kind, app_id, + app_display_name, device_display_name, pushkey, ts, lang, data, + last_stream_ordering, last_success, failing_since, + COALESCE(enabled, TRUE) AS enabled + FROM pushers + """ + + sql += "WHERE %s" % (" AND ".join("%s = ?" % (k,) for k in keyvalues),) + + txn.execute(sql, list(keyvalues.values())) + + return self.db_pool.cursor_to_dict(txn) + + ret = await self.db_pool.runInteraction( desc="get_pushers_by", + func=get_pushers_by_txn, ) + return self._decode_pushers_rows(ret) - async def get_all_pushers(self) -> Iterator[PusherConfig]: - def get_pushers(txn: LoggingTransaction) -> Iterator[PusherConfig]: - txn.execute("SELECT * FROM pushers") + async def get_enabled_pushers(self) -> Iterator[PusherConfig]: + def get_enabled_pushers_txn(txn: LoggingTransaction) -> Iterator[PusherConfig]: + txn.execute("SELECT * FROM pushers WHERE COALESCE(enabled, TRUE)") rows = self.db_pool.cursor_to_dict(txn) return self._decode_pushers_rows(rows) - return await self.db_pool.runInteraction("get_all_pushers", get_pushers) + return await self.db_pool.runInteraction( + "get_enabled_pushers", get_enabled_pushers_txn + ) async def get_all_updated_pushers_rows( self, instance_name: str, last_id: int, current_id: int, limit: int @@ -476,6 +495,7 @@ class PusherStore(PusherWorkerStore): data: Optional[JsonDict], last_stream_ordering: int, profile_tag: str = "", + enabled: bool = True, ) -> None: async with self._pushers_id_gen.get_next() as stream_id: # no need to lock because `pushers` has a unique key on @@ -494,6 +514,7 @@ class PusherStore(PusherWorkerStore): "last_stream_ordering": last_stream_ordering, "profile_tag": profile_tag, "id": stream_id, + "enabled": enabled, }, desc="add_pusher", lock=False, diff --git a/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql b/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql new file mode 100644 index 0000000000..dba3b4900b --- /dev/null +++ b/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql @@ -0,0 +1,16 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE pushers ADD COLUMN enabled BOOLEAN; \ No newline at end of file diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 7a3b0d6755..fd14568f55 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -114,7 +114,7 @@ class EmailPusherTests(HomeserverTestCase): ) self.pusher = self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=self.user_id, access_token=self.token_id, kind="email", @@ -136,7 +136,7 @@ class EmailPusherTests(HomeserverTestCase): """ with self.assertRaises(SynapseError) as cm: self.get_success_or_raise( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=self.user_id, access_token=self.token_id, kind="email", diff --git a/tests/push/test_http.py b/tests/push/test_http.py index d9c68cdd2d..af67d84463 100644 --- a/tests/push/test_http.py +++ b/tests/push/test_http.py @@ -19,8 +19,8 @@ from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin from synapse.logging.context import make_deferred_yieldable -from synapse.push import PusherConfigException -from synapse.rest.client import login, push_rule, receipts, room +from synapse.push import PusherConfig, PusherConfigException +from synapse.rest.client import login, push_rule, pusher, receipts, room from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock @@ -35,6 +35,7 @@ class HTTPPusherTests(HomeserverTestCase): login.register_servlets, receipts.register_servlets, push_rule.register_servlets, + pusher.register_servlets, ] user_id = True hijack_auth = False @@ -74,7 +75,7 @@ class HTTPPusherTests(HomeserverTestCase): def test_data(data: Optional[JsonDict]) -> None: self.get_failure( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -119,7 +120,7 @@ class HTTPPusherTests(HomeserverTestCase): token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -235,7 +236,7 @@ class HTTPPusherTests(HomeserverTestCase): token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -355,7 +356,7 @@ class HTTPPusherTests(HomeserverTestCase): token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -441,7 +442,7 @@ class HTTPPusherTests(HomeserverTestCase): token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -518,7 +519,7 @@ class HTTPPusherTests(HomeserverTestCase): token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -624,7 +625,7 @@ class HTTPPusherTests(HomeserverTestCase): token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -728,18 +729,38 @@ class HTTPPusherTests(HomeserverTestCase): ) self.assertEqual(channel.code, 200, channel.json_body) - def _make_user_with_pusher(self, username: str) -> Tuple[str, str]: + def _make_user_with_pusher( + self, username: str, enabled: bool = True + ) -> Tuple[str, str]: + """Registers a user and creates a pusher for them. + + Args: + username: the localpart of the new user's Matrix ID. + enabled: whether to create the pusher in an enabled or disabled state. + """ user_id = self.register_user(username, "pass") access_token = self.login(username, "pass") # Register the pusher + self._set_pusher(user_id, access_token, enabled) + + return user_id, access_token + + def _set_pusher(self, user_id: str, access_token: str, enabled: bool) -> None: + """Creates or updates the pusher for the given user. + + Args: + user_id: the user's Matrix ID. + access_token: the access token associated with the pusher. + enabled: whether to enable or disable the pusher. + """ user_tuple = self.get_success( self.hs.get_datastores().main.get_user_by_access_token(access_token) ) token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -749,11 +770,10 @@ class HTTPPusherTests(HomeserverTestCase): pushkey="a@example.com", lang=None, data={"url": "http://example.com/_matrix/push/v1/notify"}, + enabled=enabled, ) ) - return user_id, access_token - def test_dont_notify_rule_overrides_message(self) -> None: """ The override push rule will suppress notification @@ -791,3 +811,105 @@ class HTTPPusherTests(HomeserverTestCase): # The user sends a message back (sends a notification) self.helper.send(room, body="Hello", tok=access_token) self.assertEqual(len(self.push_attempts), 1) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_disable(self) -> None: + """Tests that disabling a pusher means it's not pushed to anymore.""" + user_id, access_token = self._make_user_with_pusher("user") + other_user_id, other_access_token = self._make_user_with_pusher("otheruser") + + room = self.helper.create_room_as(user_id, tok=access_token) + self.helper.join(room=room, user=other_user_id, tok=other_access_token) + + # Send a message and check that it generated a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 1) + + # Disable the pusher. + self._set_pusher(user_id, access_token, enabled=False) + + # Send another message and check that it did not generate a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 1) + + # Get the pushers for the user and check that it is marked as disabled. + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + + enabled = channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"] + self.assertFalse(enabled) + self.assertTrue(isinstance(enabled, bool)) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_enable(self) -> None: + """Tests that enabling a disabled pusher means it gets pushed to.""" + # Create the user with the pusher already disabled. + user_id, access_token = self._make_user_with_pusher("user", enabled=False) + other_user_id, other_access_token = self._make_user_with_pusher("otheruser") + + room = self.helper.create_room_as(user_id, tok=access_token) + self.helper.join(room=room, user=other_user_id, tok=other_access_token) + + # Send a message and check that it did not generate a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 0) + + # Enable the pusher. + self._set_pusher(user_id, access_token, enabled=True) + + # Send another message and check that it did generate a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 1) + + # Get the pushers for the user and check that it is marked as enabled. + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + + enabled = channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"] + self.assertTrue(enabled) + self.assertTrue(isinstance(enabled, bool)) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_null_enabled(self) -> None: + """Tests that a pusher that has an 'enabled' column set to NULL (eg pushers + created before the column was introduced) is considered enabled. + """ + # We intentionally set 'enabled' to None so that it's stored as NULL in the + # database. + user_id, access_token = self._make_user_with_pusher("user", enabled=None) # type: ignore[arg-type] + + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + self.assertTrue(channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"]) + + def test_update_different_device_access_token(self) -> None: + """Tests that if we create a pusher from one device, the update it from another + device, the access token associated with the pusher stays the same. + """ + # Create a user with a pusher. + user_id, access_token = self._make_user_with_pusher("user") + + # Get the token ID for the current access token, since that's what we store in + # the pushers table. + user_tuple = self.get_success( + self.hs.get_datastores().main.get_user_by_access_token(access_token) + ) + token_id = user_tuple.token_id + + # Generate a new access token, and update the pusher with it. + new_token = self.login("user", "pass") + self._set_pusher(user_id, new_token, enabled=False) + + # Get the current list of pushers for the user. + ret = self.get_success( + self.hs.get_datastores().main.get_pushers_by({"user_name": user_id}) + ) + pushers: List[PusherConfig] = list(ret) + + # Check that we still have one pusher, and that the access token associated with + # it didn't change. + self.assertEqual(len(pushers), 1) + self.assertEqual(pushers[0].access_token, token_id) diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py index 8f4f6688ce..59fea93e49 100644 --- a/tests/replication/test_pusher_shard.py +++ b/tests/replication/test_pusher_shard.py @@ -55,7 +55,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase): token_id = user_dict.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 9f536ceeb3..1847e6ad6b 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -2839,7 +2839,7 @@ class PushersRestTestCase(unittest.HomeserverTestCase): token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=self.other_user, access_token=token_id, kind="http", -- cgit 1.5.1 From ccca14140a019c2e0430f95d78fa075efd8d535f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 21 Sep 2022 16:31:53 +0100 Subject: Track device IDs for pushers (#13831) Second half of the MSC3881 implementation --- changelog.d/13831.feature | 1 + synapse/push/__init__.py | 2 + synapse/push/pusherpool.py | 10 ++- synapse/rest/client/pusher.py | 3 + synapse/storage/databases/main/pusher.py | 73 +++++++++++++++++++++- .../schema/main/delta/73/03pusher_device_id.sql | 20 ++++++ tests/push/test_http.py | 55 ++++++++++++++-- 7 files changed, 154 insertions(+), 10 deletions(-) create mode 100644 changelog.d/13831.feature create mode 100644 synapse/storage/schema/main/delta/73/03pusher_device_id.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13831.feature b/changelog.d/13831.feature new file mode 100644 index 0000000000..6c8e5cffe2 --- /dev/null +++ b/changelog.d/13831.feature @@ -0,0 +1 @@ +Add experimental support for [MSC3881: Remotely toggle push notifications for another client](https://github.com/matrix-org/matrix-spec-proposals/pull/3881). diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index ac99d35a7e..a0c760239d 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -117,6 +117,7 @@ class PusherConfig: last_success: Optional[int] failing_since: Optional[int] enabled: bool + device_id: Optional[str] def as_dict(self) -> Dict[str, Any]: """Information that can be retrieved about a pusher after creation.""" @@ -130,6 +131,7 @@ class PusherConfig: "profile_tag": self.profile_tag, "pushkey": self.pushkey, "enabled": self.enabled, + "device_id": self.device_id, } diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 2597898cf4..e2648cbc93 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -107,6 +107,7 @@ class PusherPool: data: JsonDict, profile_tag: str = "", enabled: bool = True, + device_id: Optional[str] = None, ) -> Optional[Pusher]: """Creates a new pusher and adds it to the pool @@ -149,18 +150,20 @@ class PusherPool: last_success=None, failing_since=None, enabled=enabled, + device_id=device_id, ) ) # Before we actually persist the pusher, we check if the user already has one - # for this app ID and pushkey. If so, we want to keep the access token in place, - # since this could be one device modifying (e.g. enabling/disabling) another - # device's pusher. + # this app ID and pushkey. If so, we want to keep the access token and device ID + # in place, since this could be one device modifying (e.g. enabling/disabling) + # another device's pusher. existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey( user_id, app_id, pushkey ) if existing_config: access_token = existing_config.access_token + device_id = existing_config.device_id await self.store.add_pusher( user_id=user_id, @@ -176,6 +179,7 @@ class PusherPool: last_stream_ordering=last_stream_ordering, profile_tag=profile_tag, enabled=enabled, + device_id=device_id, ) pusher = await self.process_pusher_change_by_id(app_id, pushkey, user_id) diff --git a/synapse/rest/client/pusher.py b/synapse/rest/client/pusher.py index c9f76125dc..975eef2144 100644 --- a/synapse/rest/client/pusher.py +++ b/synapse/rest/client/pusher.py @@ -57,7 +57,9 @@ class PushersRestServlet(RestServlet): for pusher in pusher_dicts: if self._msc3881_enabled: pusher["org.matrix.msc3881.enabled"] = pusher["enabled"] + pusher["org.matrix.msc3881.device_id"] = pusher["device_id"] del pusher["enabled"] + del pusher["device_id"] return 200, {"pushers": pusher_dicts} @@ -134,6 +136,7 @@ class PushersSetRestServlet(RestServlet): data=content["data"], profile_tag=content.get("profile_tag", ""), enabled=enabled, + device_id=requester.device_id, ) except PusherConfigException as pce: raise SynapseError( diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index ee55b8c4a9..01206950a9 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -124,7 +124,7 @@ class PusherWorkerStore(SQLBaseStore): id, user_name, access_token, profile_tag, kind, app_id, app_display_name, device_display_name, pushkey, ts, lang, data, last_stream_ordering, last_success, failing_since, - COALESCE(enabled, TRUE) AS enabled + COALESCE(enabled, TRUE) AS enabled, device_id FROM pushers """ @@ -477,7 +477,74 @@ class PusherWorkerStore(SQLBaseStore): return number_deleted -class PusherStore(PusherWorkerStore): +class PusherBackgroundUpdatesStore(SQLBaseStore): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self.db_pool.updates.register_background_update_handler( + "set_device_id_for_pushers", self._set_device_id_for_pushers + ) + + async def _set_device_id_for_pushers( + self, progress: JsonDict, batch_size: int + ) -> int: + """Background update to populate the device_id column of the pushers table.""" + last_pusher_id = progress.get("pusher_id", 0) + + def set_device_id_for_pushers_txn(txn: LoggingTransaction) -> int: + txn.execute( + """ + SELECT p.id, at.device_id + FROM pushers AS p + INNER JOIN access_tokens AS at + ON p.access_token = at.id + WHERE + p.access_token IS NOT NULL + AND at.device_id IS NOT NULL + AND p.id > ? + ORDER BY p.id + LIMIT ? + """, + (last_pusher_id, batch_size), + ) + + rows = self.db_pool.cursor_to_dict(txn) + if len(rows) == 0: + return 0 + + self.db_pool.simple_update_many_txn( + txn=txn, + table="pushers", + key_names=("id",), + key_values=[(row["id"],) for row in rows], + value_names=("device_id",), + value_values=[(row["device_id"],) for row in rows], + ) + + self.db_pool.updates._background_update_progress_txn( + txn, "set_device_id_for_pushers", {"pusher_id": rows[-1]["id"]} + ) + + return len(rows) + + nb_processed = await self.db_pool.runInteraction( + "set_device_id_for_pushers", set_device_id_for_pushers_txn + ) + + if nb_processed < batch_size: + await self.db_pool.updates._end_background_update( + "set_device_id_for_pushers" + ) + + return nb_processed + + +class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore): def get_pushers_stream_token(self) -> int: return self._pushers_id_gen.get_current_token() @@ -496,6 +563,7 @@ class PusherStore(PusherWorkerStore): last_stream_ordering: int, profile_tag: str = "", enabled: bool = True, + device_id: Optional[str] = None, ) -> None: async with self._pushers_id_gen.get_next() as stream_id: # no need to lock because `pushers` has a unique key on @@ -515,6 +583,7 @@ class PusherStore(PusherWorkerStore): "profile_tag": profile_tag, "id": stream_id, "enabled": enabled, + "device_id": device_id, }, desc="add_pusher", lock=False, diff --git a/synapse/storage/schema/main/delta/73/03pusher_device_id.sql b/synapse/storage/schema/main/delta/73/03pusher_device_id.sql new file mode 100644 index 0000000000..1b4ffbeebe --- /dev/null +++ b/synapse/storage/schema/main/delta/73/03pusher_device_id.sql @@ -0,0 +1,20 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a device_id column to track the device ID that created the pusher. It's NULLable +-- on purpose, because a) it might not be possible to track down the device that created +-- old pushers (pushers.access_token and access_tokens.device_id are both NULLable), and +-- b) access tokens retrieved via the admin API don't have a device associated to them. +ALTER TABLE pushers ADD COLUMN device_id TEXT; \ No newline at end of file diff --git a/tests/push/test_http.py b/tests/push/test_http.py index af67d84463..b383b8401f 100644 --- a/tests/push/test_http.py +++ b/tests/push/test_http.py @@ -22,6 +22,7 @@ from synapse.logging.context import make_deferred_yieldable from synapse.push import PusherConfig, PusherConfigException from synapse.rest.client import login, push_rule, pusher, receipts, room from synapse.server import HomeServer +from synapse.storage.databases.main.registration import TokenLookupResult from synapse.types import JsonDict from synapse.util import Clock @@ -771,6 +772,7 @@ class HTTPPusherTests(HomeserverTestCase): lang=None, data={"url": "http://example.com/_matrix/push/v1/notify"}, enabled=enabled, + device_id=user_tuple.device_id, ) ) @@ -885,19 +887,21 @@ class HTTPPusherTests(HomeserverTestCase): self.assertEqual(len(channel.json_body["pushers"]), 1) self.assertTrue(channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"]) - def test_update_different_device_access_token(self) -> None: + def test_update_different_device_access_token_device_id(self) -> None: """Tests that if we create a pusher from one device, the update it from another - device, the access token associated with the pusher stays the same. + device, the access token and device ID associated with the pusher stays the + same. """ # Create a user with a pusher. user_id, access_token = self._make_user_with_pusher("user") # Get the token ID for the current access token, since that's what we store in - # the pushers table. + # the pushers table. Also get the device ID from it. user_tuple = self.get_success( self.hs.get_datastores().main.get_user_by_access_token(access_token) ) token_id = user_tuple.token_id + device_id = user_tuple.device_id # Generate a new access token, and update the pusher with it. new_token = self.login("user", "pass") @@ -909,7 +913,48 @@ class HTTPPusherTests(HomeserverTestCase): ) pushers: List[PusherConfig] = list(ret) - # Check that we still have one pusher, and that the access token associated with - # it didn't change. + # Check that we still have one pusher, and that the access token and device ID + # associated with it didn't change. self.assertEqual(len(pushers), 1) self.assertEqual(pushers[0].access_token, token_id) + self.assertEqual(pushers[0].device_id, device_id) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_device_id(self) -> None: + """Tests that a pusher created with a given device ID shows that device ID in + GET /pushers requests. + """ + self.register_user("user", "pass") + access_token = self.login("user", "pass") + + # We create the pusher with an HTTP request rather than with + # _make_user_with_pusher so that we can test the device ID is correctly set when + # creating a pusher via an API call. + self.make_request( + method="POST", + path="/pushers/set", + content={ + "kind": "http", + "app_id": "m.http", + "app_display_name": "HTTP Push Notifications", + "device_display_name": "pushy push", + "pushkey": "a@example.com", + "lang": "en", + "data": {"url": "http://example.com/_matrix/push/v1/notify"}, + }, + access_token=access_token, + ) + + # Look up the user info for the access token so we can compare the device ID. + lookup_result: TokenLookupResult = self.get_success( + self.hs.get_datastores().main.get_user_by_access_token(access_token) + ) + + # Get the user's devices and check it has the correct device ID. + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + self.assertEqual( + channel.json_body["pushers"][0]["org.matrix.msc3881.device_id"], + lookup_result.device_id, + ) -- cgit 1.5.1 From 0a38c7ec6d46b6e51bfa53ff44e51637d3c63f5c Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 26 Sep 2022 18:28:32 +0100 Subject: Snapshot schema 72 (#13873) Including another batch of fixes to the schema dump script --- changelog.d/13873.misc | 1 + scripts-dev/make_full_schema.sh | 60 +- synapse/storage/database.py | 8 + synapse/storage/engines/_base.py | 23 +- synapse/storage/engines/postgres.py | 12 +- synapse/storage/engines/sqlite.py | 21 +- synapse/storage/prepare_database.py | 8 +- .../common/full_schemas/72/full.sql.postgres | 8 + .../schema/common/full_schemas/72/full.sql.sqlite | 6 + .../schema/main/full_schemas/72/full.sql.postgres | 1344 ++++++++++++++++++++ .../schema/main/full_schemas/72/full.sql.sqlite | 646 ++++++++++ .../schema/state/full_schemas/72/full.sql.postgres | 30 + .../schema/state/full_schemas/72/full.sql.sqlite | 20 + 13 files changed, 2165 insertions(+), 22 deletions(-) create mode 100644 changelog.d/13873.misc create mode 100644 synapse/storage/schema/common/full_schemas/72/full.sql.postgres create mode 100644 synapse/storage/schema/common/full_schemas/72/full.sql.sqlite create mode 100644 synapse/storage/schema/main/full_schemas/72/full.sql.postgres create mode 100644 synapse/storage/schema/main/full_schemas/72/full.sql.sqlite create mode 100644 synapse/storage/schema/state/full_schemas/72/full.sql.postgres create mode 100644 synapse/storage/schema/state/full_schemas/72/full.sql.sqlite (limited to 'synapse/storage/schema') diff --git a/changelog.d/13873.misc b/changelog.d/13873.misc new file mode 100644 index 0000000000..f4342482f0 --- /dev/null +++ b/changelog.d/13873.misc @@ -0,0 +1 @@ +Create a new snapshot of the database schema. diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh index d8cd06ee4f..e2bc1640bb 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh @@ -26,6 +26,9 @@ usage() { echo " Defaults to 9999." echo "-h" echo " Display this help text." + echo "" + echo " NB: make sure to run this against the *oldest* supported version of postgres," + echo " or else pg_dump might output non-backwards-compatible syntax." } SCHEMA_NUMBER="9999" @@ -240,25 +243,54 @@ DROP TABLE user_directory_search_stat; echo "Dumping SQLite3 schema..." -mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER" -sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" -sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" -sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" -sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" -sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" -sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER" +sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" cleanup_pg_schema() { - sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' + # Cleanup as follows: + # - Remove empty lines. pg_dump likes to output a lot of these. + # - Remove comment-only lines. pg_dump also likes to output a lot of these to visually + # separate tables etc. + # - Remove "public." prefix --- the schema name. + # - Remove "SET" commands. Last time I ran this, the output commands were + # SET statement_timeout = 0; + # SET lock_timeout = 0; + # SET idle_in_transaction_session_timeout = 0; + # SET client_encoding = 'UTF8'; + # SET standard_conforming_strings = on; + # SET check_function_bodies = false; + # SET xmloption = content; + # SET client_min_messages = warning; + # SET row_security = off; + # SET default_table_access_method = heap; + # - Very carefully remove specific SELECT statements. We CANNOT blanket remove all + # SELECT statements because some of those have side-effects which we do want in the + # schema. Last time I ran this, the only SELECTS were + # SELECT pg_catalog.set_config('search_path', '', false); + # and + # SELECT pg_catalog.setval(text, bigint, bool); + # We do want to remove the former, but the latter is important. If the last argument + # is `true` or omitted, this marks the given integer as having been consumed and + # will NOT appear as the nextval. + sed -e '/^$/d' \ + -e '/^--/d' \ + -e 's/public\.//g' \ + -e '/^SET /d' \ + -e '/^SELECT pg_catalog.set_config/d' } echo "Dumping Postgres schema..." -pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres" -pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres" -pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres" -pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres" -pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres" -pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" echo "Done! Files dumped to: $OUTPUT_DIR" diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 9d116f6925..6cc88aad32 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -393,6 +393,14 @@ class LoggingTransaction: def executemany(self, sql: str, *args: Any) -> None: self._do_execute(self.txn.executemany, sql, *args) + def executescript(self, sql: str) -> None: + if isinstance(self.database_engine, Sqlite3Engine): + self._do_execute(self.txn.executescript, sql) # type: ignore[attr-defined] + else: + raise NotImplementedError( + f"executescript only exists for sqlite driver, not {type(self.database_engine)}" + ) + def _make_sql_one_line(self, sql: str) -> str: "Strip newlines out of SQL so that the loggers in the DB are on one line" return " ".join(line.strip() for line in sql.splitlines() if line.strip()) diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py index 0d16a419a4..70e594a68f 100644 --- a/synapse/storage/engines/_base.py +++ b/synapse/storage/engines/_base.py @@ -32,9 +32,10 @@ class IncorrectDatabaseSetup(RuntimeError): ConnectionType = TypeVar("ConnectionType", bound=Connection) +CursorType = TypeVar("CursorType", bound=Cursor) -class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta): +class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCMeta): def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]): self.module = module @@ -64,7 +65,7 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta): ... @abc.abstractmethod - def check_new_database(self, txn: Cursor) -> None: + def check_new_database(self, txn: CursorType) -> None: """Gets called when setting up a brand new database. This allows us to apply stricter checks on new databases versus existing database. """ @@ -124,3 +125,21 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta): Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default. """ ... + + @staticmethod + @abc.abstractmethod + def executescript(cursor: CursorType, script: str) -> None: + """Execute a chunk of SQL containing multiple semicolon-delimited statements. + + This is not provided by DBAPI2, and so needs engine-specific support. + """ + ... + + @classmethod + def execute_script_file(cls, cursor: CursorType, filepath: str) -> None: + """Execute a file containing multiple semicolon-delimited SQL statements. + + This is not provided by DBAPI2, and so needs engine-specific support. + """ + with open(filepath, "rt") as f: + cls.executescript(cursor, f.read()) diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 7f7d006ac2..d8c0f64d9a 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -31,7 +31,9 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]): +class PostgresEngine( + BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor] +): def __init__(self, database_config: Mapping[str, Any]): super().__init__(psycopg2, database_config) psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) @@ -212,3 +214,11 @@ class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]): else: isolation_level = self.isolation_level_map[isolation_level] return conn.set_isolation_level(isolation_level) + + @staticmethod + def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None: + """Execute a chunk of SQL containing multiple semicolon-delimited statements. + + Psycopg2 seems happy to do this in DBAPI2's `execute()` function. + """ + cursor.execute(script) diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index 095ae0a096..faa574dbfd 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -24,7 +24,7 @@ if TYPE_CHECKING: from synapse.storage.database import LoggingDatabaseConnection -class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]): +class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]): def __init__(self, database_config: Mapping[str, Any]): super().__init__(sqlite3, database_config) @@ -120,6 +120,25 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]): # All transactions are SERIALIZABLE by default in sqlite pass + @staticmethod + def executescript(cursor: sqlite3.Cursor, script: str) -> None: + """Execute a chunk of SQL containing multiple semicolon-delimited statements. + + Python's built-in SQLite driver does not allow you to do this with DBAPI2's + `execute`: + + > execute() will only execute a single SQL statement. If you try to execute more + > than one statement with it, it will raise a Warning. Use executescript() if + > you want to execute multiple SQL statements with one call. + + Though the docs for `executescript` warn: + + > If there is a pending transaction, an implicit COMMIT statement is executed + > first. No other implicit transaction control is performed; any transaction + > control must be added to sql_script. + """ + cursor.executescript(script) + # Following functions taken from: https://github.com/coleifer/peewee diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 09a2b58f4c..3acdb39da7 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -266,7 +266,7 @@ def _setup_new_database( ".sql." + specific ): logger.debug("Applying schema %s", entry.absolute_path) - executescript(cur, entry.absolute_path) + database_engine.execute_script_file(cur, entry.absolute_path) cur.execute( "INSERT INTO schema_version (version, upgraded) VALUES (?,?)", @@ -517,7 +517,7 @@ def _upgrade_existing_database( UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path ) logger.info("Applying schema %s", relative_path) - executescript(cur, absolute_path) + database_engine.execute_script_file(cur, absolute_path) elif ext == specific_engine_extension and root_name.endswith(".sql"): # A .sql file specific to our engine; just read and execute it if is_worker: @@ -525,7 +525,7 @@ def _upgrade_existing_database( UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path ) logger.info("Applying engine-specific schema %s", relative_path) - executescript(cur, absolute_path) + database_engine.execute_script_file(cur, absolute_path) elif ext in specific_engine_extensions and root_name.endswith(".sql"): # A .sql file for a different engine; skip it. continue @@ -666,7 +666,7 @@ def _get_or_create_schema_state( ) -> Optional[_SchemaState]: # Bluntly try creating the schema_version tables. sql_path = os.path.join(schema_path, "common", "schema_version.sql") - executescript(txn, sql_path) + database_engine.execute_script_file(txn, sql_path) txn.execute("SELECT version, upgraded FROM schema_version") row = txn.fetchone() diff --git a/synapse/storage/schema/common/full_schemas/72/full.sql.postgres b/synapse/storage/schema/common/full_schemas/72/full.sql.postgres new file mode 100644 index 0000000000..f0e546f052 --- /dev/null +++ b/synapse/storage/schema/common/full_schemas/72/full.sql.postgres @@ -0,0 +1,8 @@ +CREATE TABLE background_updates ( + update_name text NOT NULL, + progress_json text NOT NULL, + depends_on text, + ordering integer DEFAULT 0 NOT NULL +); +ALTER TABLE ONLY background_updates + ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name); diff --git a/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite new file mode 100644 index 0000000000..d5a2c04aa9 --- /dev/null +++ b/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite @@ -0,0 +1,6 @@ +CREATE TABLE background_updates ( + update_name text NOT NULL, + progress_json text NOT NULL, + depends_on text, ordering INT NOT NULL DEFAULT 0, + CONSTRAINT background_updates_uniqueness UNIQUE (update_name) +); diff --git a/synapse/storage/schema/main/full_schemas/72/full.sql.postgres b/synapse/storage/schema/main/full_schemas/72/full.sql.postgres new file mode 100644 index 0000000000..d421fd9ab9 --- /dev/null +++ b/synapse/storage/schema/main/full_schemas/72/full.sql.postgres @@ -0,0 +1,1344 @@ +CREATE FUNCTION check_partial_state_events() RETURNS trigger + LANGUAGE plpgsql + AS $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM events + WHERE events.event_id = NEW.event_id + AND events.room_id != NEW.room_id + ) THEN + RAISE EXCEPTION 'Incorrect room_id in partial_state_events'; + END IF; + RETURN NEW; + END; + $$; +CREATE TABLE access_tokens ( + id bigint NOT NULL, + user_id text NOT NULL, + device_id text, + token text NOT NULL, + valid_until_ms bigint, + puppets_user_id text, + last_validated bigint, + refresh_token_id bigint, + used boolean +); +CREATE TABLE account_data ( + user_id text NOT NULL, + account_data_type text NOT NULL, + stream_id bigint NOT NULL, + content text NOT NULL, + instance_name text +); +CREATE SEQUENCE account_data_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE account_validity ( + user_id text NOT NULL, + expiration_ts_ms bigint NOT NULL, + email_sent boolean NOT NULL, + renewal_token text, + token_used_ts_ms bigint +); +CREATE TABLE application_services_state ( + as_id text NOT NULL, + state character varying(5), + read_receipt_stream_id bigint, + presence_stream_id bigint, + to_device_stream_id bigint, + device_list_stream_id bigint +); +CREATE SEQUENCE application_services_txn_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE application_services_txns ( + as_id text NOT NULL, + txn_id bigint NOT NULL, + event_ids text NOT NULL +); +CREATE TABLE appservice_room_list ( + appservice_id text NOT NULL, + network_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE appservice_stream_position ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_ordering bigint, + CONSTRAINT appservice_stream_position_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE batch_events ( + event_id text NOT NULL, + room_id text NOT NULL, + batch_id text NOT NULL +); +CREATE TABLE blocked_rooms ( + room_id text NOT NULL, + user_id text NOT NULL +); +CREATE TABLE cache_invalidation_stream_by_instance ( + stream_id bigint NOT NULL, + instance_name text NOT NULL, + cache_func text NOT NULL, + keys text[], + invalidation_ts bigint +); +CREATE SEQUENCE cache_invalidation_stream_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE current_state_delta_stream ( + stream_id bigint NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + event_id text, + prev_event_id text, + instance_name text +); +CREATE TABLE current_state_events ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + membership text +); +CREATE TABLE dehydrated_devices ( + user_id text NOT NULL, + device_id text NOT NULL, + device_data text NOT NULL +); +CREATE TABLE deleted_pushers ( + stream_id bigint NOT NULL, + app_id text NOT NULL, + pushkey text NOT NULL, + user_id text NOT NULL +); +CREATE TABLE destination_rooms ( + destination text NOT NULL, + room_id text NOT NULL, + stream_ordering bigint NOT NULL +); +CREATE TABLE destinations ( + destination text NOT NULL, + retry_last_ts bigint, + retry_interval bigint, + failure_ts bigint, + last_successful_stream_ordering bigint +); +CREATE TABLE device_auth_providers ( + user_id text NOT NULL, + device_id text NOT NULL, + auth_provider_id text NOT NULL, + auth_provider_session_id text NOT NULL +); +CREATE TABLE device_federation_inbox ( + origin text NOT NULL, + message_id text NOT NULL, + received_ts bigint NOT NULL, + instance_name text +); +CREATE TABLE device_federation_outbox ( + destination text NOT NULL, + stream_id bigint NOT NULL, + queued_ts bigint NOT NULL, + messages_json text NOT NULL, + instance_name text +); +CREATE TABLE device_inbox ( + user_id text NOT NULL, + device_id text NOT NULL, + stream_id bigint NOT NULL, + message_json text NOT NULL, + instance_name text +); +CREATE SEQUENCE device_inbox_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE device_lists_changes_in_room ( + user_id text NOT NULL, + device_id text NOT NULL, + room_id text NOT NULL, + stream_id bigint NOT NULL, + converted_to_destinations boolean NOT NULL, + opentracing_context text +); +CREATE TABLE device_lists_outbound_last_success ( + destination text NOT NULL, + user_id text NOT NULL, + stream_id bigint NOT NULL +); +CREATE TABLE device_lists_outbound_pokes ( + destination text NOT NULL, + stream_id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL, + sent boolean NOT NULL, + ts bigint NOT NULL, + opentracing_context text +); +CREATE TABLE device_lists_remote_cache ( + user_id text NOT NULL, + device_id text NOT NULL, + content text NOT NULL +); +CREATE TABLE device_lists_remote_extremeties ( + user_id text NOT NULL, + stream_id text NOT NULL +); +CREATE TABLE device_lists_remote_resync ( + user_id text NOT NULL, + added_ts bigint NOT NULL +); +CREATE TABLE device_lists_stream ( + stream_id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL +); +CREATE TABLE devices ( + user_id text NOT NULL, + device_id text NOT NULL, + display_name text, + last_seen bigint, + ip text, + user_agent text, + hidden boolean DEFAULT false +); +CREATE TABLE e2e_cross_signing_keys ( + user_id text NOT NULL, + keytype text NOT NULL, + keydata text NOT NULL, + stream_id bigint NOT NULL +); +CREATE TABLE e2e_cross_signing_signatures ( + user_id text NOT NULL, + key_id text NOT NULL, + target_user_id text NOT NULL, + target_device_id text NOT NULL, + signature text NOT NULL +); +CREATE TABLE e2e_device_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + ts_added_ms bigint NOT NULL, + key_json text NOT NULL +); +CREATE TABLE e2e_fallback_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + algorithm text NOT NULL, + key_id text NOT NULL, + key_json text NOT NULL, + used boolean DEFAULT false NOT NULL +); +CREATE TABLE e2e_one_time_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + algorithm text NOT NULL, + key_id text NOT NULL, + ts_added_ms bigint NOT NULL, + key_json text NOT NULL +); +CREATE TABLE e2e_room_keys ( + user_id text NOT NULL, + room_id text NOT NULL, + session_id text NOT NULL, + version bigint NOT NULL, + first_message_index integer, + forwarded_count integer, + is_verified boolean, + session_data text NOT NULL +); +CREATE TABLE e2e_room_keys_versions ( + user_id text NOT NULL, + version bigint NOT NULL, + algorithm text NOT NULL, + auth_data text NOT NULL, + deleted smallint DEFAULT 0 NOT NULL, + etag bigint +); +CREATE TABLE erased_users ( + user_id text NOT NULL +); +CREATE TABLE event_auth ( + event_id text NOT NULL, + auth_id text NOT NULL, + room_id text NOT NULL +); +CREATE SEQUENCE event_auth_chain_id + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE event_auth_chain_links ( + origin_chain_id bigint NOT NULL, + origin_sequence_number bigint NOT NULL, + target_chain_id bigint NOT NULL, + target_sequence_number bigint NOT NULL +); +CREATE TABLE event_auth_chain_to_calculate ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL +); +CREATE TABLE event_auth_chains ( + event_id text NOT NULL, + chain_id bigint NOT NULL, + sequence_number bigint NOT NULL +); +CREATE TABLE event_backward_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE event_edges ( + event_id text NOT NULL, + prev_event_id text NOT NULL, + room_id text, + is_state boolean DEFAULT false NOT NULL +); +CREATE TABLE event_expiry ( + event_id text NOT NULL, + expiry_ts bigint NOT NULL +); +CREATE TABLE event_forward_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE event_json ( + event_id text NOT NULL, + room_id text NOT NULL, + internal_metadata text NOT NULL, + json text NOT NULL, + format_version integer +); +CREATE TABLE event_labels ( + event_id text NOT NULL, + label text NOT NULL, + room_id text NOT NULL, + topological_ordering bigint NOT NULL +); +CREATE TABLE event_push_actions ( + room_id text NOT NULL, + event_id text NOT NULL, + user_id text NOT NULL, + profile_tag character varying(32), + actions text NOT NULL, + topological_ordering bigint, + stream_ordering bigint, + notif smallint, + highlight smallint, + unread smallint, + thread_id text +); +CREATE TABLE event_push_actions_staging ( + event_id text NOT NULL, + user_id text NOT NULL, + actions text NOT NULL, + notif smallint NOT NULL, + highlight smallint NOT NULL, + unread smallint, + thread_id text +); +CREATE TABLE event_push_summary ( + user_id text NOT NULL, + room_id text NOT NULL, + notif_count bigint NOT NULL, + stream_ordering bigint NOT NULL, + unread_count bigint, + last_receipt_stream_ordering bigint, + thread_id text +); +CREATE TABLE event_push_summary_last_receipt_stream_id ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint NOT NULL, + CONSTRAINT event_push_summary_last_receipt_stream_id_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE event_push_summary_stream_ordering ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_ordering bigint NOT NULL, + CONSTRAINT event_push_summary_stream_ordering_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE event_relations ( + event_id text NOT NULL, + relates_to_id text NOT NULL, + relation_type text NOT NULL, + aggregation_key text +); +CREATE TABLE event_reports ( + id bigint NOT NULL, + received_ts bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL, + user_id text NOT NULL, + reason text, + content text +); +CREATE TABLE event_search ( + event_id text, + room_id text, + sender text, + key text, + vector tsvector, + origin_server_ts bigint, + stream_ordering bigint +); +CREATE TABLE event_to_state_groups ( + event_id text NOT NULL, + state_group bigint NOT NULL +); +CREATE TABLE event_txn_id ( + event_id text NOT NULL, + room_id text NOT NULL, + user_id text NOT NULL, + token_id bigint NOT NULL, + txn_id text NOT NULL, + inserted_ts bigint NOT NULL +); +CREATE TABLE events ( + topological_ordering bigint NOT NULL, + event_id text NOT NULL, + type text NOT NULL, + room_id text NOT NULL, + content text, + unrecognized_keys text, + processed boolean NOT NULL, + outlier boolean NOT NULL, + depth bigint DEFAULT 0 NOT NULL, + origin_server_ts bigint, + received_ts bigint, + sender text, + contains_url boolean, + instance_name text, + stream_ordering bigint, + state_key text, + rejection_reason text +); +CREATE SEQUENCE events_backfill_stream_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE SEQUENCE events_stream_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE ex_outlier_stream ( + event_stream_ordering bigint NOT NULL, + event_id text NOT NULL, + state_group bigint NOT NULL, + instance_name text +); +CREATE TABLE federation_inbound_events_staging ( + origin text NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL, + received_ts bigint NOT NULL, + event_json text NOT NULL, + internal_metadata text NOT NULL +); +CREATE TABLE federation_stream_position ( + type text NOT NULL, + stream_id bigint NOT NULL, + instance_name text DEFAULT 'master'::text NOT NULL +); +CREATE TABLE ignored_users ( + ignorer_user_id text NOT NULL, + ignored_user_id text NOT NULL +); +CREATE TABLE insertion_event_edges ( + event_id text NOT NULL, + room_id text NOT NULL, + insertion_prev_event_id text NOT NULL +); +CREATE TABLE insertion_event_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE insertion_events ( + event_id text NOT NULL, + room_id text NOT NULL, + next_batch_id text NOT NULL +); +CREATE TABLE instance_map ( + instance_id integer NOT NULL, + instance_name text NOT NULL +); +CREATE SEQUENCE instance_map_instance_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE instance_map_instance_id_seq OWNED BY instance_map.instance_id; +CREATE TABLE local_current_membership ( + room_id text NOT NULL, + user_id text NOT NULL, + event_id text NOT NULL, + membership text NOT NULL +); +CREATE TABLE local_media_repository ( + media_id text, + media_type text, + media_length integer, + created_ts bigint, + upload_name text, + user_id text, + quarantined_by text, + url_cache text, + last_access_ts bigint, + safe_from_quarantine boolean DEFAULT false NOT NULL +); +CREATE TABLE local_media_repository_thumbnails ( + media_id text, + thumbnail_width integer, + thumbnail_height integer, + thumbnail_type text, + thumbnail_method text, + thumbnail_length integer +); +CREATE TABLE local_media_repository_url_cache ( + url text, + response_code integer, + etag text, + expires_ts bigint, + og text, + media_id text, + download_ts bigint +); +CREATE TABLE monthly_active_users ( + user_id text NOT NULL, + "timestamp" bigint NOT NULL +); +CREATE TABLE open_id_tokens ( + token text NOT NULL, + ts_valid_until_ms bigint NOT NULL, + user_id text NOT NULL +); +CREATE TABLE partial_state_events ( + room_id text NOT NULL, + event_id text NOT NULL +); +CREATE TABLE partial_state_rooms ( + room_id text NOT NULL +); +CREATE TABLE partial_state_rooms_servers ( + room_id text NOT NULL, + server_name text NOT NULL +); +CREATE TABLE presence ( + user_id text NOT NULL, + state character varying(20), + status_msg text, + mtime bigint +); +CREATE TABLE presence_stream ( + stream_id bigint, + user_id text, + state text, + last_active_ts bigint, + last_federation_update_ts bigint, + last_user_sync_ts bigint, + status_msg text, + currently_active boolean, + instance_name text +); +CREATE SEQUENCE presence_stream_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE profiles ( + user_id text NOT NULL, + displayname text, + avatar_url text +); +CREATE TABLE push_rules ( + id bigint NOT NULL, + user_name text NOT NULL, + rule_id text NOT NULL, + priority_class smallint NOT NULL, + priority integer DEFAULT 0 NOT NULL, + conditions text NOT NULL, + actions text NOT NULL +); +CREATE TABLE push_rules_enable ( + id bigint NOT NULL, + user_name text NOT NULL, + rule_id text NOT NULL, + enabled smallint +); +CREATE TABLE push_rules_stream ( + stream_id bigint NOT NULL, + event_stream_ordering bigint NOT NULL, + user_id text NOT NULL, + rule_id text NOT NULL, + op text NOT NULL, + priority_class smallint, + priority integer, + conditions text, + actions text +); +CREATE TABLE pusher_throttle ( + pusher bigint NOT NULL, + room_id text NOT NULL, + last_sent_ts bigint, + throttle_ms bigint +); +CREATE TABLE pushers ( + id bigint NOT NULL, + user_name text NOT NULL, + access_token bigint, + profile_tag text NOT NULL, + kind text NOT NULL, + app_id text NOT NULL, + app_display_name text NOT NULL, + device_display_name text NOT NULL, + pushkey text NOT NULL, + ts bigint NOT NULL, + lang text, + data text, + last_stream_ordering bigint, + last_success bigint, + failing_since bigint +); +CREATE TABLE ratelimit_override ( + user_id text NOT NULL, + messages_per_second bigint, + burst_count bigint +); +CREATE TABLE receipts_graph ( + room_id text NOT NULL, + receipt_type text NOT NULL, + user_id text NOT NULL, + event_ids text NOT NULL, + data text NOT NULL, + thread_id text +); +CREATE TABLE receipts_linearized ( + stream_id bigint NOT NULL, + room_id text NOT NULL, + receipt_type text NOT NULL, + user_id text NOT NULL, + event_id text NOT NULL, + data text NOT NULL, + instance_name text, + event_stream_ordering bigint, + thread_id text +); +CREATE SEQUENCE receipts_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE received_transactions ( + transaction_id text, + origin text, + ts bigint, + response_code integer, + response_json bytea, + has_been_referenced smallint DEFAULT 0 +); +CREATE TABLE redactions ( + event_id text NOT NULL, + redacts text NOT NULL, + have_censored boolean DEFAULT false NOT NULL, + received_ts bigint +); +CREATE TABLE refresh_tokens ( + id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL, + token text NOT NULL, + next_token_id bigint, + expiry_ts bigint, + ultimate_session_expiry_ts bigint +); +CREATE TABLE registration_tokens ( + token text NOT NULL, + uses_allowed integer, + pending integer NOT NULL, + completed integer NOT NULL, + expiry_time bigint +); +CREATE TABLE rejections ( + event_id text NOT NULL, + reason text NOT NULL, + last_check text NOT NULL +); +CREATE TABLE remote_media_cache ( + media_origin text, + media_id text, + media_type text, + created_ts bigint, + upload_name text, + media_length integer, + filesystem_id text, + last_access_ts bigint, + quarantined_by text +); +CREATE TABLE remote_media_cache_thumbnails ( + media_origin text, + media_id text, + thumbnail_width integer, + thumbnail_height integer, + thumbnail_method text, + thumbnail_type text, + thumbnail_length integer, + filesystem_id text +); +CREATE TABLE room_account_data ( + user_id text NOT NULL, + room_id text NOT NULL, + account_data_type text NOT NULL, + stream_id bigint NOT NULL, + content text NOT NULL, + instance_name text +); +CREATE TABLE room_alias_servers ( + room_alias text NOT NULL, + server text NOT NULL +); +CREATE TABLE room_aliases ( + room_alias text NOT NULL, + room_id text NOT NULL, + creator text +); +CREATE TABLE room_depth ( + room_id text NOT NULL, + min_depth bigint +); +CREATE TABLE room_memberships ( + event_id text NOT NULL, + user_id text NOT NULL, + sender text NOT NULL, + room_id text NOT NULL, + membership text NOT NULL, + forgotten integer DEFAULT 0, + display_name text, + avatar_url text +); +CREATE TABLE room_retention ( + room_id text NOT NULL, + event_id text NOT NULL, + min_lifetime bigint, + max_lifetime bigint +); +CREATE TABLE room_stats_current ( + room_id text NOT NULL, + current_state_events integer NOT NULL, + joined_members integer NOT NULL, + invited_members integer NOT NULL, + left_members integer NOT NULL, + banned_members integer NOT NULL, + local_users_in_room integer NOT NULL, + completed_delta_stream_id bigint NOT NULL, + knocked_members integer +); +CREATE TABLE room_stats_earliest_token ( + room_id text NOT NULL, + token bigint NOT NULL +); +CREATE TABLE room_stats_state ( + room_id text NOT NULL, + name text, + canonical_alias text, + join_rules text, + history_visibility text, + encryption text, + avatar text, + guest_access text, + is_federatable boolean, + topic text, + room_type text +); +CREATE TABLE room_tags ( + user_id text NOT NULL, + room_id text NOT NULL, + tag text NOT NULL, + content text NOT NULL +); +CREATE TABLE room_tags_revisions ( + user_id text NOT NULL, + room_id text NOT NULL, + stream_id bigint NOT NULL, + instance_name text +); +CREATE TABLE rooms ( + room_id text NOT NULL, + is_public boolean, + creator text, + room_version text, + has_auth_chain_index boolean +); +CREATE TABLE server_keys_json ( + server_name text NOT NULL, + key_id text NOT NULL, + from_server text NOT NULL, + ts_added_ms bigint NOT NULL, + ts_valid_until_ms bigint NOT NULL, + key_json bytea NOT NULL +); +CREATE TABLE server_signature_keys ( + server_name text, + key_id text, + from_server text, + ts_added_ms bigint, + verify_key bytea, + ts_valid_until_ms bigint +); +CREATE TABLE sessions ( + session_type text NOT NULL, + session_id text NOT NULL, + value text NOT NULL, + expiry_time_ms bigint NOT NULL +); +CREATE TABLE state_events ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + prev_state text +); +CREATE TABLE stats_incremental_position ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint NOT NULL, + CONSTRAINT stats_incremental_position_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE stream_ordering_to_exterm ( + stream_ordering bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL +); +CREATE TABLE stream_positions ( + stream_name text NOT NULL, + instance_name text NOT NULL, + stream_id bigint NOT NULL +); +CREATE TABLE threepid_guest_access_tokens ( + medium text, + address text, + guest_access_token text, + first_inviter text +); +CREATE TABLE threepid_validation_session ( + session_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + client_secret text NOT NULL, + last_send_attempt bigint NOT NULL, + validated_at bigint +); +CREATE TABLE threepid_validation_token ( + token text NOT NULL, + session_id text NOT NULL, + next_link text, + expires bigint NOT NULL +); +CREATE TABLE ui_auth_sessions ( + session_id text NOT NULL, + creation_time bigint NOT NULL, + serverdict text NOT NULL, + clientdict text NOT NULL, + uri text NOT NULL, + method text NOT NULL, + description text NOT NULL +); +CREATE TABLE ui_auth_sessions_credentials ( + session_id text NOT NULL, + stage_type text NOT NULL, + result text NOT NULL +); +CREATE TABLE ui_auth_sessions_ips ( + session_id text NOT NULL, + ip text NOT NULL, + user_agent text NOT NULL +); +CREATE TABLE user_daily_visits ( + user_id text NOT NULL, + device_id text, + "timestamp" bigint NOT NULL, + user_agent text +); +CREATE TABLE user_directory ( + user_id text NOT NULL, + room_id text, + display_name text, + avatar_url text +); +CREATE TABLE user_directory_search ( + user_id text NOT NULL, + vector tsvector +); +CREATE TABLE user_directory_stream_pos ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint, + CONSTRAINT user_directory_stream_pos_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE user_external_ids ( + auth_provider text NOT NULL, + external_id text NOT NULL, + user_id text NOT NULL +); +CREATE TABLE user_filters ( + user_id text NOT NULL, + filter_id bigint NOT NULL, + filter_json bytea NOT NULL +); +CREATE SEQUENCE user_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE user_ips ( + user_id text NOT NULL, + access_token text NOT NULL, + device_id text, + ip text NOT NULL, + user_agent text NOT NULL, + last_seen bigint NOT NULL +); +CREATE TABLE user_signature_stream ( + stream_id bigint NOT NULL, + from_user_id text NOT NULL, + user_ids text NOT NULL +); +CREATE TABLE user_stats_current ( + user_id text NOT NULL, + joined_rooms bigint NOT NULL, + completed_delta_stream_id bigint NOT NULL +); +CREATE TABLE user_threepid_id_server ( + user_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + id_server text NOT NULL +); +CREATE TABLE user_threepids ( + user_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + validated_at bigint NOT NULL, + added_at bigint NOT NULL +); +CREATE TABLE users ( + name text, + password_hash text, + creation_ts bigint, + admin smallint DEFAULT 0 NOT NULL, + upgrade_ts bigint, + is_guest smallint DEFAULT 0 NOT NULL, + appservice_id text, + consent_version text, + consent_server_notice_sent text, + user_type text, + deactivated smallint DEFAULT 0 NOT NULL, + shadow_banned boolean, + consent_ts bigint +); +CREATE TABLE users_in_public_rooms ( + user_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE users_pending_deactivation ( + user_id text NOT NULL +); +CREATE TABLE users_to_send_full_presence_to ( + user_id text NOT NULL, + presence_stream_id bigint +); +CREATE TABLE users_who_share_private_rooms ( + user_id text NOT NULL, + other_user_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE worker_locks ( + lock_name text NOT NULL, + lock_key text NOT NULL, + instance_name text NOT NULL, + token text NOT NULL, + last_renewed_ts bigint NOT NULL +); +ALTER TABLE ONLY instance_map ALTER COLUMN instance_id SET DEFAULT nextval('instance_map_instance_id_seq'::regclass); +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_pkey PRIMARY KEY (id); +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_token_key UNIQUE (token); +ALTER TABLE ONLY account_data + ADD CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type); +ALTER TABLE ONLY account_validity + ADD CONSTRAINT account_validity_pkey PRIMARY KEY (user_id); +ALTER TABLE ONLY application_services_state + ADD CONSTRAINT application_services_state_pkey PRIMARY KEY (as_id); +ALTER TABLE ONLY application_services_txns + ADD CONSTRAINT application_services_txns_as_id_txn_id_key UNIQUE (as_id, txn_id); +ALTER TABLE ONLY appservice_stream_position + ADD CONSTRAINT appservice_stream_position_lock_key UNIQUE (lock); +ALTER TABLE ONLY current_state_events + ADD CONSTRAINT current_state_events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY current_state_events + ADD CONSTRAINT current_state_events_room_id_type_state_key_key UNIQUE (room_id, type, state_key); +ALTER TABLE ONLY dehydrated_devices + ADD CONSTRAINT dehydrated_devices_pkey PRIMARY KEY (user_id); +ALTER TABLE ONLY destination_rooms + ADD CONSTRAINT destination_rooms_pkey PRIMARY KEY (destination, room_id); +ALTER TABLE ONLY destinations + ADD CONSTRAINT destinations_pkey PRIMARY KEY (destination); +ALTER TABLE ONLY devices + ADD CONSTRAINT device_uniqueness UNIQUE (user_id, device_id); +ALTER TABLE ONLY e2e_device_keys_json + ADD CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id); +ALTER TABLE ONLY e2e_fallback_keys_json + ADD CONSTRAINT e2e_fallback_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm); +ALTER TABLE ONLY e2e_one_time_keys_json + ADD CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id); +ALTER TABLE ONLY event_auth_chain_to_calculate + ADD CONSTRAINT event_auth_chain_to_calculate_pkey PRIMARY KEY (event_id); +ALTER TABLE ONLY event_auth_chains + ADD CONSTRAINT event_auth_chains_pkey PRIMARY KEY (event_id); +ALTER TABLE ONLY event_backward_extremities + ADD CONSTRAINT event_backward_extremities_event_id_room_id_key UNIQUE (event_id, room_id); +ALTER TABLE ONLY event_expiry + ADD CONSTRAINT event_expiry_pkey PRIMARY KEY (event_id); +ALTER TABLE ONLY event_forward_extremities + ADD CONSTRAINT event_forward_extremities_event_id_room_id_key UNIQUE (event_id, room_id); +ALTER TABLE ONLY event_push_actions + ADD CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag); +ALTER TABLE ONLY event_json + ADD CONSTRAINT event_json_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY event_labels + ADD CONSTRAINT event_labels_pkey PRIMARY KEY (event_id, label); +ALTER TABLE ONLY event_push_summary_last_receipt_stream_id + ADD CONSTRAINT event_push_summary_last_receipt_stream_id_lock_key UNIQUE (lock); +ALTER TABLE ONLY event_push_summary_stream_ordering + ADD CONSTRAINT event_push_summary_stream_ordering_lock_key UNIQUE (lock); +ALTER TABLE ONLY event_reports + ADD CONSTRAINT event_reports_pkey PRIMARY KEY (id); +ALTER TABLE ONLY event_to_state_groups + ADD CONSTRAINT event_to_state_groups_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY events + ADD CONSTRAINT events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY ex_outlier_stream + ADD CONSTRAINT ex_outlier_stream_pkey PRIMARY KEY (event_stream_ordering); +ALTER TABLE ONLY instance_map + ADD CONSTRAINT instance_map_pkey PRIMARY KEY (instance_id); +ALTER TABLE ONLY local_media_repository + ADD CONSTRAINT local_media_repository_media_id_key UNIQUE (media_id); +ALTER TABLE ONLY user_threepids + ADD CONSTRAINT medium_address UNIQUE (medium, address); +ALTER TABLE ONLY open_id_tokens + ADD CONSTRAINT open_id_tokens_pkey PRIMARY KEY (token); +ALTER TABLE ONLY partial_state_events + ADD CONSTRAINT partial_state_events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY partial_state_rooms + ADD CONSTRAINT partial_state_rooms_pkey PRIMARY KEY (room_id); +ALTER TABLE ONLY partial_state_rooms_servers + ADD CONSTRAINT partial_state_rooms_servers_room_id_server_name_key UNIQUE (room_id, server_name); +ALTER TABLE ONLY presence + ADD CONSTRAINT presence_user_id_key UNIQUE (user_id); +ALTER TABLE ONLY profiles + ADD CONSTRAINT profiles_user_id_key UNIQUE (user_id); +ALTER TABLE ONLY push_rules_enable + ADD CONSTRAINT push_rules_enable_pkey PRIMARY KEY (id); +ALTER TABLE ONLY push_rules_enable + ADD CONSTRAINT push_rules_enable_user_name_rule_id_key UNIQUE (user_name, rule_id); +ALTER TABLE ONLY push_rules + ADD CONSTRAINT push_rules_pkey PRIMARY KEY (id); +ALTER TABLE ONLY push_rules + ADD CONSTRAINT push_rules_user_name_rule_id_key UNIQUE (user_name, rule_id); +ALTER TABLE ONLY pusher_throttle + ADD CONSTRAINT pusher_throttle_pkey PRIMARY KEY (pusher, room_id); +ALTER TABLE ONLY pushers + ADD CONSTRAINT pushers2_app_id_pushkey_user_name_key UNIQUE (app_id, pushkey, user_name); +ALTER TABLE ONLY pushers + ADD CONSTRAINT pushers2_pkey PRIMARY KEY (id); +ALTER TABLE ONLY receipts_graph + ADD CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id); +ALTER TABLE ONLY receipts_graph + ADD CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id); +ALTER TABLE ONLY receipts_linearized + ADD CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id); +ALTER TABLE ONLY receipts_linearized + ADD CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id); +ALTER TABLE ONLY received_transactions + ADD CONSTRAINT received_transactions_transaction_id_origin_key UNIQUE (transaction_id, origin); +ALTER TABLE ONLY redactions + ADD CONSTRAINT redactions_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY refresh_tokens + ADD CONSTRAINT refresh_tokens_pkey PRIMARY KEY (id); +ALTER TABLE ONLY refresh_tokens + ADD CONSTRAINT refresh_tokens_token_key UNIQUE (token); +ALTER TABLE ONLY registration_tokens + ADD CONSTRAINT registration_tokens_token_key UNIQUE (token); +ALTER TABLE ONLY rejections + ADD CONSTRAINT rejections_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY remote_media_cache + ADD CONSTRAINT remote_media_cache_media_origin_media_id_key UNIQUE (media_origin, media_id); +ALTER TABLE ONLY room_account_data + ADD CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type); +ALTER TABLE ONLY room_aliases + ADD CONSTRAINT room_aliases_room_alias_key UNIQUE (room_alias); +ALTER TABLE ONLY room_depth + ADD CONSTRAINT room_depth_room_id_key UNIQUE (room_id); +ALTER TABLE ONLY room_memberships + ADD CONSTRAINT room_memberships_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY room_retention + ADD CONSTRAINT room_retention_pkey PRIMARY KEY (room_id, event_id); +ALTER TABLE ONLY room_stats_current + ADD CONSTRAINT room_stats_current_pkey PRIMARY KEY (room_id); +ALTER TABLE ONLY room_tags_revisions + ADD CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id); +ALTER TABLE ONLY room_tags + ADD CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag); +ALTER TABLE ONLY rooms + ADD CONSTRAINT rooms_pkey PRIMARY KEY (room_id); +ALTER TABLE ONLY server_keys_json + ADD CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server); +ALTER TABLE ONLY server_signature_keys + ADD CONSTRAINT server_signature_keys_server_name_key_id_key UNIQUE (server_name, key_id); +ALTER TABLE ONLY sessions + ADD CONSTRAINT sessions_session_type_session_id_key UNIQUE (session_type, session_id); +ALTER TABLE ONLY state_events + ADD CONSTRAINT state_events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY stats_incremental_position + ADD CONSTRAINT stats_incremental_position_lock_key UNIQUE (lock); +ALTER TABLE ONLY threepid_validation_session + ADD CONSTRAINT threepid_validation_session_pkey PRIMARY KEY (session_id); +ALTER TABLE ONLY threepid_validation_token + ADD CONSTRAINT threepid_validation_token_pkey PRIMARY KEY (token); +ALTER TABLE ONLY ui_auth_sessions_credentials + ADD CONSTRAINT ui_auth_sessions_credentials_session_id_stage_type_key UNIQUE (session_id, stage_type); +ALTER TABLE ONLY ui_auth_sessions_ips + ADD CONSTRAINT ui_auth_sessions_ips_session_id_ip_user_agent_key UNIQUE (session_id, ip, user_agent); +ALTER TABLE ONLY ui_auth_sessions + ADD CONSTRAINT ui_auth_sessions_session_id_key UNIQUE (session_id); +ALTER TABLE ONLY user_directory_stream_pos + ADD CONSTRAINT user_directory_stream_pos_lock_key UNIQUE (lock); +ALTER TABLE ONLY user_external_ids + ADD CONSTRAINT user_external_ids_auth_provider_external_id_key UNIQUE (auth_provider, external_id); +ALTER TABLE ONLY user_stats_current + ADD CONSTRAINT user_stats_current_pkey PRIMARY KEY (user_id); +ALTER TABLE ONLY users + ADD CONSTRAINT users_name_key UNIQUE (name); +ALTER TABLE ONLY users_to_send_full_presence_to + ADD CONSTRAINT users_to_send_full_presence_to_pkey PRIMARY KEY (user_id); +CREATE INDEX access_tokens_device_id ON access_tokens USING btree (user_id, device_id); +CREATE INDEX account_data_stream_id ON account_data USING btree (user_id, stream_id); +CREATE INDEX application_services_txns_id ON application_services_txns USING btree (as_id); +CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list USING btree (appservice_id, network_id, room_id); +CREATE INDEX batch_events_batch_id ON batch_events USING btree (batch_id); +CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms USING btree (room_id); +CREATE UNIQUE INDEX cache_invalidation_stream_by_instance_id ON cache_invalidation_stream_by_instance USING btree (stream_id); +CREATE INDEX cache_invalidation_stream_by_instance_instance_index ON cache_invalidation_stream_by_instance USING btree (instance_name, stream_id); +CREATE UNIQUE INDEX chunk_events_event_id ON batch_events USING btree (event_id); +CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream USING btree (stream_id); +CREATE INDEX current_state_events_member_index ON current_state_events USING btree (state_key) WHERE (type = 'm.room.member'::text); +CREATE INDEX deleted_pushers_stream_id ON deleted_pushers USING btree (stream_id); +CREATE INDEX destination_rooms_room_id ON destination_rooms USING btree (room_id); +CREATE INDEX device_auth_providers_devices ON device_auth_providers USING btree (user_id, device_id); +CREATE INDEX device_auth_providers_sessions ON device_auth_providers USING btree (auth_provider_id, auth_provider_session_id); +CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox USING btree (origin, message_id); +CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox USING btree (destination, stream_id); +CREATE INDEX device_federation_outbox_id ON device_federation_outbox USING btree (stream_id); +CREATE INDEX device_inbox_stream_id_user_id ON device_inbox USING btree (stream_id, user_id); +CREATE INDEX device_inbox_user_stream_id ON device_inbox USING btree (user_id, device_id, stream_id); +CREATE UNIQUE INDEX device_lists_changes_in_stream_id ON device_lists_changes_in_room USING btree (stream_id, room_id); +CREATE INDEX device_lists_changes_in_stream_id_unconverted ON device_lists_changes_in_room USING btree (stream_id) WHERE (NOT converted_to_destinations); +CREATE UNIQUE INDEX device_lists_outbound_last_success_unique_idx ON device_lists_outbound_last_success USING btree (destination, user_id); +CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes USING btree (destination, stream_id); +CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes USING btree (stream_id); +CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes USING btree (destination, user_id); +CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache USING btree (user_id, device_id); +CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties USING btree (user_id); +CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync USING btree (user_id); +CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync USING btree (added_ts); +CREATE INDEX device_lists_stream_id ON device_lists_stream USING btree (stream_id, user_id); +CREATE INDEX device_lists_stream_user_id ON device_lists_stream USING btree (user_id, device_id); +CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys USING btree (user_id, keytype, stream_id); +CREATE UNIQUE INDEX e2e_cross_signing_keys_stream_idx ON e2e_cross_signing_keys USING btree (stream_id); +CREATE INDEX e2e_cross_signing_signatures2_idx ON e2e_cross_signing_signatures USING btree (user_id, target_user_id, target_device_id); +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions USING btree (user_id, version); +CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys USING btree (user_id, version, room_id, session_id); +CREATE UNIQUE INDEX erased_users_user ON erased_users USING btree (user_id); +CREATE INDEX ev_b_extrem_id ON event_backward_extremities USING btree (event_id); +CREATE INDEX ev_b_extrem_room ON event_backward_extremities USING btree (room_id); +CREATE INDEX ev_edges_prev_id ON event_edges USING btree (prev_event_id); +CREATE INDEX ev_extrem_id ON event_forward_extremities USING btree (event_id); +CREATE INDEX ev_extrem_room ON event_forward_extremities USING btree (room_id); +CREATE INDEX evauth_edges_id ON event_auth USING btree (event_id); +CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links USING btree (origin_chain_id, target_chain_id); +CREATE INDEX event_auth_chain_to_calculate_rm_id ON event_auth_chain_to_calculate USING btree (room_id); +CREATE UNIQUE INDEX event_auth_chains_c_seq_index ON event_auth_chains USING btree (chain_id, sequence_number); +CREATE INDEX event_contains_url_index ON events USING btree (room_id, topological_ordering, stream_ordering) WHERE ((contains_url = true) AND (outlier = false)); +CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx ON event_edges USING btree (event_id, prev_event_id); +CREATE INDEX event_expiry_expiry_ts_idx ON event_expiry USING btree (expiry_ts); +CREATE INDEX event_labels_room_id_label_idx ON event_labels USING btree (room_id, label, topological_ordering); +CREATE INDEX event_push_actions_highlights_index ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering) WHERE (highlight = 1); +CREATE INDEX event_push_actions_rm_tokens ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering); +CREATE INDEX event_push_actions_room_id_user_id ON event_push_actions USING btree (room_id, user_id); +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging USING btree (event_id); +CREATE INDEX event_push_actions_stream_highlight_index ON event_push_actions USING btree (highlight, stream_ordering) WHERE (highlight = 0); +CREATE INDEX event_push_actions_stream_ordering ON event_push_actions USING btree (stream_ordering, user_id); +CREATE INDEX event_push_actions_u_highlight ON event_push_actions USING btree (user_id, stream_ordering); +CREATE UNIQUE INDEX event_push_summary_unique_index ON event_push_summary USING btree (user_id, room_id); +CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary USING btree (user_id, room_id, thread_id); +CREATE UNIQUE INDEX event_relations_id ON event_relations USING btree (event_id); +CREATE INDEX event_relations_relates ON event_relations USING btree (relates_to_id, relation_type, aggregation_key); +CREATE INDEX event_search_ev_ridx ON event_search USING btree (room_id); +CREATE UNIQUE INDEX event_search_event_id_idx ON event_search USING btree (event_id); +CREATE INDEX event_search_fts_idx ON event_search USING gin (vector); +CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups USING btree (state_group); +CREATE UNIQUE INDEX event_txn_id_event_id ON event_txn_id USING btree (event_id); +CREATE INDEX event_txn_id_ts ON event_txn_id USING btree (inserted_ts); +CREATE UNIQUE INDEX event_txn_id_txn_id ON event_txn_id USING btree (room_id, user_id, token_id, txn_id); +CREATE INDEX events_order_room ON events USING btree (room_id, topological_ordering, stream_ordering); +CREATE INDEX events_room_stream ON events USING btree (room_id, stream_ordering); +CREATE UNIQUE INDEX events_stream_ordering ON events USING btree (stream_ordering); +CREATE INDEX events_ts ON events USING btree (origin_server_ts, stream_ordering); +CREATE UNIQUE INDEX federation_inbound_events_staging_instance_event ON federation_inbound_events_staging USING btree (origin, event_id); +CREATE INDEX federation_inbound_events_staging_room ON federation_inbound_events_staging USING btree (room_id, received_ts); +CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position USING btree (type, instance_name); +CREATE INDEX ignored_users_ignored_user_id ON ignored_users USING btree (ignored_user_id); +CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users USING btree (ignorer_user_id, ignored_user_id); +CREATE INDEX insertion_event_edges_event_id ON insertion_event_edges USING btree (event_id); +CREATE INDEX insertion_event_edges_insertion_prev_event_id ON insertion_event_edges USING btree (insertion_prev_event_id); +CREATE INDEX insertion_event_edges_insertion_room_id ON insertion_event_edges USING btree (room_id); +CREATE UNIQUE INDEX insertion_event_extremities_event_id ON insertion_event_extremities USING btree (event_id); +CREATE INDEX insertion_event_extremities_room_id ON insertion_event_extremities USING btree (room_id); +CREATE UNIQUE INDEX insertion_events_event_id ON insertion_events USING btree (event_id); +CREATE INDEX insertion_events_next_batch_id ON insertion_events USING btree (next_batch_id); +CREATE UNIQUE INDEX instance_map_idx ON instance_map USING btree (instance_name); +CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership USING btree (user_id, room_id); +CREATE INDEX local_current_membership_room_idx ON local_current_membership USING btree (room_id); +CREATE UNIQUE INDEX local_media_repository_thumbn_media_id_width_height_method_key ON local_media_repository_thumbnails USING btree (media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method); +CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails USING btree (media_id); +CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache USING btree (url, download_ts); +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache USING btree (expires_ts); +CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache USING btree (media_id); +CREATE INDEX local_media_repository_url_idx ON local_media_repository USING btree (created_ts) WHERE (url_cache IS NOT NULL); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users USING btree ("timestamp"); +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users USING btree (user_id); +CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens USING btree (ts_valid_until_ms); +CREATE INDEX partial_state_events_room_id_idx ON partial_state_events USING btree (room_id); +CREATE INDEX presence_stream_id ON presence_stream USING btree (stream_id, user_id); +CREATE INDEX presence_stream_state_not_offline_idx ON presence_stream USING btree (state) WHERE (state <> 'offline'::text); +CREATE INDEX presence_stream_user_id ON presence_stream USING btree (user_id); +CREATE INDEX public_room_index ON rooms USING btree (is_public); +CREATE INDEX push_rules_enable_user_name ON push_rules_enable USING btree (user_name); +CREATE INDEX push_rules_stream_id ON push_rules_stream USING btree (stream_id); +CREATE INDEX push_rules_stream_user_stream_id ON push_rules_stream USING btree (user_id, stream_id); +CREATE INDEX push_rules_user_name ON push_rules USING btree (user_name); +CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override USING btree (user_id); +CREATE UNIQUE INDEX receipts_graph_unique_index ON receipts_graph USING btree (room_id, receipt_type, user_id) WHERE (thread_id IS NULL); +CREATE INDEX receipts_linearized_id ON receipts_linearized USING btree (stream_id); +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized USING btree (room_id, stream_id); +CREATE UNIQUE INDEX receipts_linearized_unique_index ON receipts_linearized USING btree (room_id, receipt_type, user_id) WHERE (thread_id IS NULL); +CREATE INDEX receipts_linearized_user ON receipts_linearized USING btree (user_id); +CREATE INDEX received_transactions_ts ON received_transactions USING btree (ts); +CREATE INDEX redactions_have_censored_ts ON redactions USING btree (received_ts) WHERE (NOT have_censored); +CREATE INDEX redactions_redacts ON redactions USING btree (redacts); +CREATE INDEX refresh_tokens_next_token_id ON refresh_tokens USING btree (next_token_id) WHERE (next_token_id IS NOT NULL); +CREATE UNIQUE INDEX remote_media_repository_thumbn_media_origin_id_width_height_met ON remote_media_cache_thumbnails USING btree (media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method); +CREATE INDEX room_account_data_stream_id ON room_account_data USING btree (user_id, stream_id); +CREATE INDEX room_alias_servers_alias ON room_alias_servers USING btree (room_alias); +CREATE INDEX room_aliases_id ON room_aliases USING btree (room_id); +CREATE INDEX room_memberships_room_id ON room_memberships USING btree (room_id); +CREATE INDEX room_memberships_user_id ON room_memberships USING btree (user_id); +CREATE INDEX room_memberships_user_room_forgotten ON room_memberships USING btree (user_id, room_id) WHERE (forgotten = 1); +CREATE INDEX room_retention_max_lifetime_idx ON room_retention USING btree (max_lifetime); +CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token USING btree (room_id); +CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state USING btree (room_id); +CREATE INDEX stream_ordering_to_exterm_idx ON stream_ordering_to_exterm USING btree (stream_ordering); +CREATE INDEX stream_ordering_to_exterm_rm_idx ON stream_ordering_to_exterm USING btree (room_id, stream_ordering); +CREATE UNIQUE INDEX stream_positions_idx ON stream_positions USING btree (stream_name, instance_name); +CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens USING btree (medium, address); +CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token USING btree (session_id); +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits USING btree ("timestamp"); +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits USING btree (user_id, "timestamp"); +CREATE INDEX user_directory_room_idx ON user_directory USING btree (room_id); +CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin (vector); +CREATE UNIQUE INDEX user_directory_search_user_idx ON user_directory_search USING btree (user_id); +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory USING btree (user_id); +CREATE INDEX user_external_ids_user_id_idx ON user_external_ids USING btree (user_id); +CREATE UNIQUE INDEX user_filters_unique ON user_filters USING btree (user_id, filter_id); +CREATE INDEX user_ips_device_id ON user_ips USING btree (user_id, device_id, last_seen); +CREATE INDEX user_ips_last_seen ON user_ips USING btree (user_id, last_seen); +CREATE INDEX user_ips_last_seen_only ON user_ips USING btree (last_seen); +CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips USING btree (user_id, access_token, ip); +CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream USING btree (stream_id); +CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server USING btree (user_id, medium, address, id_server); +CREATE INDEX user_threepids_medium_address ON user_threepids USING btree (medium, address); +CREATE INDEX user_threepids_user_id ON user_threepids USING btree (user_id); +CREATE INDEX users_creation_ts ON users USING btree (creation_ts); +CREATE INDEX users_have_local_media ON local_media_repository USING btree (user_id, created_ts); +CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms USING btree (room_id); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms USING btree (user_id, room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms USING btree (other_user_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms USING btree (room_id); +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms USING btree (user_id, other_user_id, room_id); +CREATE UNIQUE INDEX worker_locks_key ON worker_locks USING btree (lock_name, lock_key); +CREATE TRIGGER check_partial_state_events BEFORE INSERT OR UPDATE ON partial_state_events FOR EACH ROW EXECUTE PROCEDURE check_partial_state_events(); +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_refresh_token_id_fkey FOREIGN KEY (refresh_token_id) REFERENCES refresh_tokens(id) ON DELETE CASCADE; +ALTER TABLE ONLY destination_rooms + ADD CONSTRAINT destination_rooms_destination_fkey FOREIGN KEY (destination) REFERENCES destinations(destination); +ALTER TABLE ONLY destination_rooms + ADD CONSTRAINT destination_rooms_room_id_fkey FOREIGN KEY (room_id) REFERENCES rooms(room_id); +ALTER TABLE ONLY event_edges + ADD CONSTRAINT event_edges_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id); +ALTER TABLE ONLY event_txn_id + ADD CONSTRAINT event_txn_id_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id) ON DELETE CASCADE; +ALTER TABLE ONLY event_txn_id + ADD CONSTRAINT event_txn_id_token_id_fkey FOREIGN KEY (token_id) REFERENCES access_tokens(id) ON DELETE CASCADE; +ALTER TABLE ONLY partial_state_events + ADD CONSTRAINT partial_state_events_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id); +ALTER TABLE ONLY partial_state_events + ADD CONSTRAINT partial_state_events_room_id_fkey FOREIGN KEY (room_id) REFERENCES partial_state_rooms(room_id); +ALTER TABLE ONLY partial_state_rooms + ADD CONSTRAINT partial_state_rooms_room_id_fkey FOREIGN KEY (room_id) REFERENCES rooms(room_id); +ALTER TABLE ONLY partial_state_rooms_servers + ADD CONSTRAINT partial_state_rooms_servers_room_id_fkey FOREIGN KEY (room_id) REFERENCES partial_state_rooms(room_id); +ALTER TABLE ONLY refresh_tokens + ADD CONSTRAINT refresh_tokens_next_token_id_fkey FOREIGN KEY (next_token_id) REFERENCES refresh_tokens(id) ON DELETE CASCADE; +ALTER TABLE ONLY ui_auth_sessions_credentials + ADD CONSTRAINT ui_auth_sessions_credentials_session_id_fkey FOREIGN KEY (session_id) REFERENCES ui_auth_sessions(session_id); +ALTER TABLE ONLY ui_auth_sessions_ips + ADD CONSTRAINT ui_auth_sessions_ips_session_id_fkey FOREIGN KEY (session_id) REFERENCES ui_auth_sessions(session_id); +ALTER TABLE ONLY users_to_send_full_presence_to + ADD CONSTRAINT users_to_send_full_presence_to_user_id_fkey FOREIGN KEY (user_id) REFERENCES users(name); +INSERT INTO appservice_stream_position VALUES ('X', 0); +INSERT INTO event_push_summary_last_receipt_stream_id VALUES ('X', 0); +INSERT INTO event_push_summary_stream_ordering VALUES ('X', 0); +INSERT INTO federation_stream_position VALUES ('federation', -1, 'master'); +INSERT INTO federation_stream_position VALUES ('events', -1, 'master'); +INSERT INTO stats_incremental_position VALUES ('X', 1); +INSERT INTO user_directory_stream_pos VALUES ('X', 1); +SELECT pg_catalog.setval('account_data_sequence', 1, true); +SELECT pg_catalog.setval('application_services_txn_id_seq', 1, false); +SELECT pg_catalog.setval('cache_invalidation_stream_seq', 1, true); +SELECT pg_catalog.setval('device_inbox_sequence', 1, true); +SELECT pg_catalog.setval('event_auth_chain_id', 1, false); +SELECT pg_catalog.setval('events_backfill_stream_seq', 1, true); +SELECT pg_catalog.setval('events_stream_seq', 1, true); +SELECT pg_catalog.setval('instance_map_instance_id_seq', 1, false); +SELECT pg_catalog.setval('presence_stream_sequence', 1, true); +SELECT pg_catalog.setval('receipts_sequence', 1, true); +SELECT pg_catalog.setval('user_id_seq', 1, false); diff --git a/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite new file mode 100644 index 0000000000..d403baf1fb --- /dev/null +++ b/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite @@ -0,0 +1,646 @@ +CREATE TABLE application_services_txns( as_id TEXT NOT NULL, txn_id INTEGER NOT NULL, event_ids TEXT NOT NULL, UNIQUE(as_id, txn_id) ); +CREATE INDEX application_services_txns_id ON application_services_txns ( as_id ); +CREATE TABLE presence( user_id TEXT NOT NULL, state VARCHAR(20), status_msg TEXT, mtime BIGINT, UNIQUE (user_id) ); +CREATE TABLE users( name TEXT, password_hash TEXT, creation_ts BIGINT, admin SMALLINT DEFAULT 0 NOT NULL, upgrade_ts BIGINT, is_guest SMALLINT DEFAULT 0 NOT NULL, appservice_id TEXT, consent_version TEXT, consent_server_notice_sent TEXT, user_type TEXT DEFAULT NULL, deactivated SMALLINT DEFAULT 0 NOT NULL, shadow_banned BOOLEAN, consent_ts bigint, UNIQUE(name) ); +CREATE TABLE user_ips ( user_id TEXT NOT NULL, access_token TEXT NOT NULL, device_id TEXT, ip TEXT NOT NULL, user_agent TEXT NOT NULL, last_seen BIGINT NOT NULL ); +CREATE TABLE profiles( user_id TEXT NOT NULL, displayname TEXT, avatar_url TEXT, UNIQUE(user_id) ); +CREATE TABLE received_transactions( transaction_id TEXT, origin TEXT, ts BIGINT, response_code INTEGER, response_json bytea, has_been_referenced smallint default 0, UNIQUE (transaction_id, origin) ); +CREATE TABLE destinations( destination TEXT PRIMARY KEY, retry_last_ts BIGINT, retry_interval INTEGER , failure_ts BIGINT, last_successful_stream_ordering BIGINT); +CREATE TABLE events( stream_ordering INTEGER PRIMARY KEY, topological_ordering BIGINT NOT NULL, event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, content TEXT, unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, depth BIGINT DEFAULT 0 NOT NULL, origin_server_ts BIGINT, received_ts BIGINT, sender TEXT, contains_url BOOLEAN, instance_name TEXT, state_key TEXT DEFAULT NULL, rejection_reason TEXT DEFAULT NULL, UNIQUE (event_id) ); +CREATE INDEX events_order_room ON events ( room_id, topological_ordering, stream_ordering ); +CREATE TABLE event_json( event_id TEXT NOT NULL, room_id TEXT NOT NULL, internal_metadata TEXT NOT NULL, json TEXT NOT NULL, format_version INTEGER, UNIQUE (event_id) ); +CREATE TABLE state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, prev_state TEXT, UNIQUE (event_id) ); +CREATE TABLE current_state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, membership TEXT, UNIQUE (event_id), UNIQUE (room_id, type, state_key) ); +CREATE TABLE room_memberships( event_id TEXT NOT NULL, user_id TEXT NOT NULL, sender TEXT NOT NULL, room_id TEXT NOT NULL, membership TEXT NOT NULL, forgotten INTEGER DEFAULT 0, display_name TEXT, avatar_url TEXT, UNIQUE (event_id) ); +CREATE INDEX room_memberships_room_id ON room_memberships (room_id); +CREATE INDEX room_memberships_user_id ON room_memberships (user_id); +CREATE TABLE rooms( room_id TEXT PRIMARY KEY NOT NULL, is_public BOOL, creator TEXT , room_version TEXT, has_auth_chain_index BOOLEAN); +CREATE TABLE server_signature_keys( server_name TEXT, key_id TEXT, from_server TEXT, ts_added_ms BIGINT, verify_key bytea, ts_valid_until_ms BIGINT, UNIQUE (server_name, key_id) ); +CREATE TABLE rejections( event_id TEXT NOT NULL, reason TEXT NOT NULL, last_check TEXT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE push_rules ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, priority_class SMALLINT NOT NULL, priority INTEGER NOT NULL DEFAULT 0, conditions TEXT NOT NULL, actions TEXT NOT NULL, UNIQUE(user_name, rule_id) ); +CREATE INDEX push_rules_user_name on push_rules (user_name); +CREATE TABLE push_rules_enable ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, enabled SMALLINT, UNIQUE(user_name, rule_id) ); +CREATE INDEX push_rules_enable_user_name on push_rules_enable (user_name); +CREATE TABLE event_forward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) ); +CREATE INDEX ev_extrem_room ON event_forward_extremities(room_id); +CREATE INDEX ev_extrem_id ON event_forward_extremities(event_id); +CREATE TABLE event_backward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) ); +CREATE INDEX ev_b_extrem_room ON event_backward_extremities(room_id); +CREATE INDEX ev_b_extrem_id ON event_backward_extremities(event_id); +CREATE TABLE room_depth( room_id TEXT NOT NULL, min_depth INTEGER NOT NULL, UNIQUE (room_id) ); +CREATE TABLE event_to_state_groups( event_id TEXT NOT NULL, state_group BIGINT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE local_media_repository ( media_id TEXT, media_type TEXT, media_length INTEGER, created_ts BIGINT, upload_name TEXT, user_id TEXT, quarantined_by TEXT, url_cache TEXT, last_access_ts BIGINT, safe_from_quarantine BOOLEAN NOT NULL DEFAULT 0, UNIQUE (media_id) ); +CREATE TABLE remote_media_cache ( media_origin TEXT, media_id TEXT, media_type TEXT, created_ts BIGINT, upload_name TEXT, media_length INTEGER, filesystem_id TEXT, last_access_ts BIGINT, quarantined_by TEXT, UNIQUE (media_origin, media_id) ); +CREATE TABLE redactions ( event_id TEXT NOT NULL, redacts TEXT NOT NULL, have_censored BOOL NOT NULL DEFAULT false, received_ts BIGINT, UNIQUE (event_id) ); +CREATE INDEX redactions_redacts ON redactions (redacts); +CREATE TABLE room_aliases( room_alias TEXT NOT NULL, room_id TEXT NOT NULL, creator TEXT, UNIQUE (room_alias) ); +CREATE INDEX room_aliases_id ON room_aliases(room_id); +CREATE TABLE room_alias_servers( room_alias TEXT NOT NULL, server TEXT NOT NULL ); +CREATE INDEX room_alias_servers_alias ON room_alias_servers(room_alias); +CREATE TABLE IF NOT EXISTS "server_keys_json" ( server_name TEXT NOT NULL, key_id TEXT NOT NULL, from_server TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, ts_valid_until_ms BIGINT NOT NULL, key_json bytea NOT NULL, CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server) ); +CREATE TABLE e2e_device_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id) ); +CREATE TABLE e2e_one_time_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, algorithm TEXT NOT NULL, key_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id) ); +CREATE TABLE IF NOT EXISTS "user_threepids" ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, validated_at BIGINT NOT NULL, added_at BIGINT NOT NULL, CONSTRAINT medium_address UNIQUE (medium, address) ); +CREATE INDEX user_threepids_user_id ON user_threepids(user_id); +CREATE VIRTUAL TABLE event_search USING fts4 ( event_id, room_id, sender, key, value ) +/* event_search(event_id,room_id,sender,"key",value) */; +CREATE TABLE room_tags( user_id TEXT NOT NULL, room_id TEXT NOT NULL, tag TEXT NOT NULL, content TEXT NOT NULL, CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) ); +CREATE TABLE room_tags_revisions ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, stream_id BIGINT NOT NULL, instance_name TEXT, CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) ); +CREATE TABLE account_data( user_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, instance_name TEXT, CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type) ); +CREATE TABLE room_account_data( user_id TEXT NOT NULL, room_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, instance_name TEXT, CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type) ); +CREATE INDEX account_data_stream_id on account_data(user_id, stream_id); +CREATE INDEX room_account_data_stream_id on room_account_data(user_id, stream_id); +CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering); +CREATE TABLE event_push_actions( room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, profile_tag VARCHAR(32), actions TEXT NOT NULL, topological_ordering BIGINT, stream_ordering BIGINT, notif SMALLINT, highlight SMALLINT, unread SMALLINT, thread_id TEXT, CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) ); +CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id); +CREATE INDEX events_room_stream on events(room_id, stream_ordering); +CREATE INDEX public_room_index on rooms(is_public); +CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering ); +CREATE TABLE presence_stream( stream_id BIGINT, user_id TEXT, state TEXT, last_active_ts BIGINT, last_federation_update_ts BIGINT, last_user_sync_ts BIGINT, status_msg TEXT, currently_active BOOLEAN , instance_name TEXT); +CREATE INDEX presence_stream_id ON presence_stream(stream_id, user_id); +CREATE INDEX presence_stream_user_id ON presence_stream(user_id); +CREATE TABLE push_rules_stream( stream_id BIGINT NOT NULL, event_stream_ordering BIGINT NOT NULL, user_id TEXT NOT NULL, rule_id TEXT NOT NULL, op TEXT NOT NULL, priority_class SMALLINT, priority INTEGER, conditions TEXT, actions TEXT ); +CREATE INDEX push_rules_stream_id ON push_rules_stream(stream_id); +CREATE INDEX push_rules_stream_user_stream_id on push_rules_stream(user_id, stream_id); +CREATE TABLE ex_outlier_stream( event_stream_ordering BIGINT PRIMARY KEY NOT NULL, event_id TEXT NOT NULL, state_group BIGINT NOT NULL , instance_name TEXT); +CREATE TABLE threepid_guest_access_tokens( medium TEXT, address TEXT, guest_access_token TEXT, first_inviter TEXT ); +CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens(medium, address); +CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id ); +CREATE TABLE open_id_tokens ( token TEXT NOT NULL PRIMARY KEY, ts_valid_until_ms bigint NOT NULL, user_id TEXT NOT NULL, UNIQUE (token) ); +CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens(ts_valid_until_ms); +CREATE TABLE pusher_throttle( pusher BIGINT NOT NULL, room_id TEXT NOT NULL, last_sent_ts BIGINT, throttle_ms BIGINT, PRIMARY KEY (pusher, room_id) ); +CREATE TABLE event_reports( id BIGINT NOT NULL PRIMARY KEY, received_ts BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, reason TEXT, content TEXT ); +CREATE TABLE appservice_stream_position( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT, CHECK (Lock='X') ); +CREATE TABLE device_inbox ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, stream_id BIGINT NOT NULL, message_json TEXT NOT NULL , instance_name TEXT); +CREATE INDEX device_inbox_user_stream_id ON device_inbox(user_id, device_id, stream_id); +CREATE INDEX received_transactions_ts ON received_transactions(ts); +CREATE TABLE device_federation_outbox ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, queued_ts BIGINT NOT NULL, messages_json TEXT NOT NULL , instance_name TEXT); +CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox(destination, stream_id); +CREATE TABLE device_federation_inbox ( origin TEXT NOT NULL, message_id TEXT NOT NULL, received_ts BIGINT NOT NULL , instance_name TEXT); +CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox(origin, message_id); +CREATE TABLE stream_ordering_to_exterm ( stream_ordering BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL ); +CREATE INDEX stream_ordering_to_exterm_idx on stream_ordering_to_exterm( stream_ordering ); +CREATE INDEX stream_ordering_to_exterm_rm_idx on stream_ordering_to_exterm( room_id, stream_ordering ); +CREATE TABLE IF NOT EXISTS "event_auth"( event_id TEXT NOT NULL, auth_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE INDEX evauth_edges_id ON event_auth(event_id); +CREATE INDEX user_threepids_medium_address on user_threepids (medium, address); +CREATE TABLE appservice_room_list( appservice_id TEXT NOT NULL, network_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list( appservice_id, network_id, room_id ); +CREATE INDEX device_federation_outbox_id ON device_federation_outbox(stream_id); +CREATE TABLE federation_stream_position( type TEXT NOT NULL, stream_id INTEGER NOT NULL , instance_name TEXT NOT NULL DEFAULT 'master'); +CREATE TABLE device_lists_remote_cache ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, content TEXT NOT NULL ); +CREATE TABLE device_lists_remote_extremeties ( user_id TEXT NOT NULL, stream_id TEXT NOT NULL ); +CREATE TABLE device_lists_stream ( stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL ); +CREATE INDEX device_lists_stream_id ON device_lists_stream(stream_id, user_id); +CREATE TABLE device_lists_outbound_pokes ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL, sent BOOLEAN NOT NULL, ts BIGINT NOT NULL , opentracing_context TEXT); +CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes(destination, stream_id); +CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes(destination, user_id); +CREATE TABLE event_push_summary ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, notif_count BIGINT NOT NULL, stream_ordering BIGINT NOT NULL , unread_count BIGINT, last_receipt_stream_ordering BIGINT, thread_id TEXT); +CREATE TABLE event_push_summary_stream_ordering ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT NOT NULL, CHECK (Lock='X') ); +CREATE TABLE IF NOT EXISTS "pushers" ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, access_token BIGINT DEFAULT NULL, profile_tag TEXT NOT NULL, kind TEXT NOT NULL, app_id TEXT NOT NULL, app_display_name TEXT NOT NULL, device_display_name TEXT NOT NULL, pushkey TEXT NOT NULL, ts BIGINT NOT NULL, lang TEXT, data TEXT, last_stream_ordering INTEGER, last_success BIGINT, failing_since BIGINT, UNIQUE (app_id, pushkey, user_name) ); +CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes(stream_id); +CREATE TABLE ratelimit_override ( user_id TEXT NOT NULL, messages_per_second BIGINT, burst_count BIGINT ); +CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override(user_id); +CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, event_id TEXT, prev_event_id TEXT , instance_name TEXT); +CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id); +CREATE TABLE user_directory_stream_pos ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT, CHECK (Lock='X') ); +CREATE VIRTUAL TABLE user_directory_search USING fts4 ( user_id, value ) +/* user_directory_search(user_id,value) */; +CREATE TABLE blocked_rooms ( room_id TEXT NOT NULL, user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms(room_id); +CREATE TABLE IF NOT EXISTS "local_media_repository_url_cache"( url TEXT, response_code INTEGER, etag TEXT, expires_ts BIGINT, og TEXT, media_id TEXT, download_ts BIGINT ); +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts); +CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache(url, download_ts); +CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache(media_id); +CREATE TABLE IF NOT EXISTS "deleted_pushers" ( stream_id BIGINT NOT NULL, app_id TEXT NOT NULL, pushkey TEXT NOT NULL, user_id TEXT NOT NULL ); +CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id); +CREATE TABLE IF NOT EXISTS "user_directory" ( user_id TEXT NOT NULL, room_id TEXT, display_name TEXT, avatar_url TEXT ); +CREATE INDEX user_directory_room_idx ON user_directory(room_id); +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id); +CREATE TABLE event_push_actions_staging ( event_id TEXT NOT NULL, user_id TEXT NOT NULL, actions TEXT NOT NULL, notif SMALLINT NOT NULL, highlight SMALLINT NOT NULL , unread SMALLINT, thread_id TEXT); +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id); +CREATE TABLE users_pending_deactivation ( user_id TEXT NOT NULL ); +CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, device_id TEXT, timestamp BIGINT NOT NULL , user_agent TEXT); +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp); +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp); +CREATE TABLE erased_users ( user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id); +CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, timestamp BIGINT NOT NULL ); +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp); +CREATE TABLE IF NOT EXISTS "e2e_room_keys_versions" ( user_id TEXT NOT NULL, version BIGINT NOT NULL, algorithm TEXT NOT NULL, auth_data TEXT NOT NULL, deleted SMALLINT DEFAULT 0 NOT NULL , etag BIGINT); +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version); +CREATE TABLE IF NOT EXISTS "e2e_room_keys" ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, session_id TEXT NOT NULL, version BIGINT NOT NULL, first_message_index INT, forwarded_count INT, is_verified BOOLEAN, session_data TEXT NOT NULL ); +CREATE TABLE users_who_share_private_rooms ( user_id TEXT NOT NULL, other_user_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id); +CREATE TABLE user_threepid_id_server ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, id_server TEXT NOT NULL ); +CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server( user_id, medium, address, id_server ); +CREATE TABLE users_in_public_rooms ( user_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); +CREATE TABLE account_validity ( user_id TEXT PRIMARY KEY, expiration_ts_ms BIGINT NOT NULL, email_sent BOOLEAN NOT NULL, renewal_token TEXT , token_used_ts_ms BIGINT); +CREATE TABLE event_relations ( event_id TEXT NOT NULL, relates_to_id TEXT NOT NULL, relation_type TEXT NOT NULL, aggregation_key TEXT ); +CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id); +CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key); +CREATE TABLE room_stats_earliest_token ( room_id TEXT NOT NULL, token BIGINT NOT NULL ); +CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id); +CREATE INDEX user_ips_device_id ON user_ips (user_id, device_id, last_seen); +CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering); +CREATE INDEX device_inbox_stream_id_user_id ON device_inbox (stream_id, user_id); +CREATE INDEX device_lists_stream_user_id ON device_lists_stream (user_id, device_id); +CREATE INDEX user_ips_last_seen ON user_ips (user_id, last_seen); +CREATE INDEX user_ips_last_seen_only ON user_ips (last_seen); +CREATE INDEX users_creation_ts ON users (creation_ts); +CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups (state_group); +CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache (user_id, device_id); +CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties (user_id); +CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips (user_id, access_token, ip); +CREATE TABLE threepid_validation_session ( + session_id TEXT PRIMARY KEY, + medium TEXT NOT NULL, + address TEXT NOT NULL, + client_secret TEXT NOT NULL, + last_send_attempt BIGINT NOT NULL, + validated_at BIGINT +); +CREATE TABLE threepid_validation_token ( + token TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + next_link TEXT, + expires BIGINT NOT NULL +); +CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token(session_id); +CREATE TABLE event_expiry ( + event_id TEXT PRIMARY KEY, + expiry_ts BIGINT NOT NULL +); +CREATE INDEX event_expiry_expiry_ts_idx ON event_expiry(expiry_ts); +CREATE TABLE event_labels ( + event_id TEXT, + label TEXT, + room_id TEXT NOT NULL, + topological_ordering BIGINT NOT NULL, + PRIMARY KEY(event_id, label) +); +CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering); +CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys(user_id, version, room_id, session_id); +CREATE TABLE IF NOT EXISTS "devices" ( + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + display_name TEXT, + last_seen BIGINT, + ip TEXT, + user_agent TEXT, + hidden BOOLEAN DEFAULT 0, + CONSTRAINT device_uniqueness UNIQUE (user_id, device_id) +); +CREATE TABLE room_retention( + room_id TEXT, + event_id TEXT, + min_lifetime BIGINT, + max_lifetime BIGINT, + + PRIMARY KEY(room_id, event_id) +); +CREATE INDEX room_retention_max_lifetime_idx on room_retention(max_lifetime); +CREATE TABLE e2e_cross_signing_keys ( + user_id TEXT NOT NULL, + -- the type of cross-signing key (master, user_signing, or self_signing) + keytype TEXT NOT NULL, + -- the full key information, as a json-encoded dict + keydata TEXT NOT NULL, + -- for keeping the keys in order, so that we can fetch the latest one + stream_id BIGINT NOT NULL +); +CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, stream_id); +CREATE TABLE e2e_cross_signing_signatures ( + -- user who did the signing + user_id TEXT NOT NULL, + -- key used to sign + key_id TEXT NOT NULL, + -- user who was signed + target_user_id TEXT NOT NULL, + -- device/key that was signed + target_device_id TEXT NOT NULL, + -- the actual signature + signature TEXT NOT NULL +); +CREATE TABLE user_signature_stream ( + -- uses the same stream ID as device list stream + stream_id BIGINT NOT NULL, + -- user who did the signing + from_user_id TEXT NOT NULL, + -- list of users who were signed, as a JSON array + user_ids TEXT NOT NULL +); +CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream(stream_id); +CREATE INDEX e2e_cross_signing_signatures2_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id); +CREATE TABLE stats_incremental_position ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); +CREATE TABLE room_stats_current ( + room_id TEXT NOT NULL PRIMARY KEY, + + -- These are absolute counts + current_state_events INT NOT NULL, + joined_members INT NOT NULL, + invited_members INT NOT NULL, + left_members INT NOT NULL, + banned_members INT NOT NULL, + + local_users_in_room INT NOT NULL, + + -- The maximum delta stream position that this row takes into account. + completed_delta_stream_id BIGINT NOT NULL +, knocked_members INT); +CREATE TABLE user_stats_current ( + user_id TEXT NOT NULL PRIMARY KEY, + + joined_rooms BIGINT NOT NULL, + + -- The maximum delta stream position that this row takes into account. + completed_delta_stream_id BIGINT NOT NULL +); +CREATE TABLE room_stats_state ( + room_id TEXT NOT NULL, + name TEXT, + canonical_alias TEXT, + join_rules TEXT, + history_visibility TEXT, + encryption TEXT, + avatar TEXT, + guest_access TEXT, + is_federatable BOOLEAN, + topic TEXT +, room_type TEXT); +CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state(room_id); +CREATE TABLE IF NOT EXISTS "user_filters" ( user_id TEXT NOT NULL, filter_id BIGINT NOT NULL, filter_json BYTEA NOT NULL ); +CREATE UNIQUE INDEX user_filters_unique ON "user_filters" (user_id, filter_id); +CREATE TABLE user_external_ids ( + auth_provider TEXT NOT NULL, + external_id TEXT NOT NULL, + user_id TEXT NOT NULL, + UNIQUE (auth_provider, external_id) +); +CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms(room_id); +CREATE TABLE device_lists_remote_resync ( + user_id TEXT NOT NULL, + added_ts BIGINT NOT NULL +); +CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync (user_id); +CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync (added_ts); +CREATE TABLE local_current_membership ( + room_id TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL, + membership TEXT NOT NULL + ); +CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership(user_id, room_id); +CREATE INDEX local_current_membership_room_idx ON local_current_membership(room_id); +CREATE TABLE ui_auth_sessions( + session_id TEXT NOT NULL, -- The session ID passed to the client. + creation_time BIGINT NOT NULL, -- The time this session was created (epoch time in milliseconds). + serverdict TEXT NOT NULL, -- A JSON dictionary of arbitrary data added by Synapse. + clientdict TEXT NOT NULL, -- A JSON dictionary of arbitrary data from the client. + uri TEXT NOT NULL, -- The URI the UI authentication session is using. + method TEXT NOT NULL, -- The HTTP method the UI authentication session is using. + -- The clientdict, uri, and method make up an tuple that must be immutable + -- throughout the lifetime of the UI Auth session. + description TEXT NOT NULL, -- A human readable description of the operation which caused the UI Auth flow to occur. + UNIQUE (session_id) +); +CREATE TABLE ui_auth_sessions_credentials( + session_id TEXT NOT NULL, -- The corresponding UI Auth session. + stage_type TEXT NOT NULL, -- The stage type. + result TEXT NOT NULL, -- The result of the stage verification, stored as JSON. + UNIQUE (session_id, stage_type), + FOREIGN KEY (session_id) + REFERENCES ui_auth_sessions (session_id) +); +CREATE TABLE IF NOT EXISTS "device_lists_outbound_last_success" ( destination TEXT NOT NULL, user_id TEXT NOT NULL, stream_id BIGINT NOT NULL ); +CREATE UNIQUE INDEX device_lists_outbound_last_success_unique_idx ON "device_lists_outbound_last_success" (destination, user_id); +CREATE TABLE IF NOT EXISTS "local_media_repository_thumbnails" ( media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_type TEXT, thumbnail_method TEXT, thumbnail_length INTEGER, UNIQUE ( media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) ); +CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails (media_id); +CREATE TABLE IF NOT EXISTS "remote_media_cache_thumbnails" ( media_origin TEXT, media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_method TEXT, thumbnail_type TEXT, thumbnail_length INTEGER, filesystem_id TEXT, UNIQUE ( media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) ); +CREATE TABLE ui_auth_sessions_ips( + session_id TEXT NOT NULL, + ip TEXT NOT NULL, + user_agent TEXT NOT NULL, + UNIQUE (session_id, ip, user_agent), + FOREIGN KEY (session_id) + REFERENCES ui_auth_sessions (session_id) +); +CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position(type, instance_name); +CREATE TABLE dehydrated_devices( + user_id TEXT NOT NULL PRIMARY KEY, + device_id TEXT NOT NULL, + device_data TEXT NOT NULL -- JSON-encoded client-defined data +); +CREATE TABLE e2e_fallback_keys_json ( + user_id TEXT NOT NULL, -- The user this fallback key is for. + device_id TEXT NOT NULL, -- The device this fallback key is for. + algorithm TEXT NOT NULL, -- Which algorithm this fallback key is for. + key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads. + key_json TEXT NOT NULL, -- The key as a JSON blob. + used BOOLEAN NOT NULL DEFAULT FALSE, -- Whether the key has been used or not. + CONSTRAINT e2e_fallback_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm) +); +CREATE TABLE destination_rooms ( + -- the destination in question. + destination TEXT NOT NULL REFERENCES destinations (destination), + -- the ID of the room in question + room_id TEXT NOT NULL REFERENCES rooms (room_id), + -- the stream_ordering of the event + stream_ordering BIGINT NOT NULL, + PRIMARY KEY (destination, room_id) + -- We don't declare a foreign key on stream_ordering here because that'd mean + -- we'd need to either maintain an index (expensive) or do a table scan of + -- destination_rooms whenever we delete an event (also potentially expensive). + -- In addition to that, a foreign key on stream_ordering would be redundant + -- as this row doesn't need to refer to a specific event; if the event gets + -- deleted then it doesn't affect the validity of the stream_ordering here. +); +CREATE INDEX destination_rooms_room_id + ON destination_rooms (room_id); +CREATE TABLE stream_positions ( + stream_name TEXT NOT NULL, + instance_name TEXT NOT NULL, + stream_id BIGINT NOT NULL +); +CREATE UNIQUE INDEX stream_positions_idx ON stream_positions(stream_name, instance_name); +CREATE TABLE IF NOT EXISTS "access_tokens" ( + id BIGINT PRIMARY KEY, + user_id TEXT NOT NULL, + device_id TEXT, + token TEXT NOT NULL, + valid_until_ms BIGINT, + puppets_user_id TEXT, + last_validated BIGINT, refresh_token_id BIGINT REFERENCES refresh_tokens (id) ON DELETE CASCADE, used BOOLEAN, + UNIQUE(token) +); +CREATE INDEX access_tokens_device_id ON access_tokens (user_id, device_id); +CREATE TABLE IF NOT EXISTS "event_txn_id" ( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + user_id TEXT NOT NULL, + token_id BIGINT NOT NULL, + txn_id TEXT NOT NULL, + inserted_ts BIGINT NOT NULL, + FOREIGN KEY (event_id) + REFERENCES events (event_id) ON DELETE CASCADE, + FOREIGN KEY (token_id) + REFERENCES access_tokens (id) ON DELETE CASCADE +); +CREATE UNIQUE INDEX event_txn_id_event_id ON event_txn_id(event_id); +CREATE UNIQUE INDEX event_txn_id_txn_id ON event_txn_id(room_id, user_id, token_id, txn_id); +CREATE INDEX event_txn_id_ts ON event_txn_id(inserted_ts); +CREATE TABLE ignored_users( ignorer_user_id TEXT NOT NULL, ignored_user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users (ignorer_user_id, ignored_user_id); +CREATE INDEX ignored_users_ignored_user_id ON ignored_users (ignored_user_id); +CREATE TABLE event_auth_chains ( + event_id TEXT PRIMARY KEY, + chain_id BIGINT NOT NULL, + sequence_number BIGINT NOT NULL +); +CREATE UNIQUE INDEX event_auth_chains_c_seq_index ON event_auth_chains (chain_id, sequence_number); +CREATE TABLE event_auth_chain_links ( + origin_chain_id BIGINT NOT NULL, + origin_sequence_number BIGINT NOT NULL, + + target_chain_id BIGINT NOT NULL, + target_sequence_number BIGINT NOT NULL +); +CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links (origin_chain_id, target_chain_id); +CREATE TABLE event_auth_chain_to_calculate ( + event_id TEXT PRIMARY KEY, + room_id TEXT NOT NULL, + type TEXT NOT NULL, + state_key TEXT NOT NULL +); +CREATE INDEX event_auth_chain_to_calculate_rm_id ON event_auth_chain_to_calculate(room_id); +CREATE TABLE users_to_send_full_presence_to( + -- The user ID to send full presence to. + user_id TEXT PRIMARY KEY, + -- A presence stream ID token - the current presence stream token when the row was last upserted. + -- If a user calls /sync and this token is part of the update they're to receive, we also include + -- full user presence in the response. + -- This allows multiple devices for a user to receive full presence whenever they next call /sync. + presence_stream_id BIGINT, + FOREIGN KEY (user_id) + REFERENCES users (name) +); +CREATE TABLE refresh_tokens ( + id BIGINT PRIMARY KEY, + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + token TEXT NOT NULL, + -- When consumed, a new refresh token is generated, which is tracked by + -- this foreign key + next_token_id BIGINT REFERENCES refresh_tokens (id) ON DELETE CASCADE, expiry_ts BIGINT DEFAULT NULL, ultimate_session_expiry_ts BIGINT DEFAULT NULL, + UNIQUE(token) +); +CREATE TABLE worker_locks ( + lock_name TEXT NOT NULL, + lock_key TEXT NOT NULL, + -- We write the instance name to ease manual debugging, we don't ever read + -- from it. + -- Note: instance names aren't guarenteed to be unique. + instance_name TEXT NOT NULL, + -- A random string generated each time an instance takes out a lock. Used by + -- the instance to tell whether the lock is still held by it (e.g. in the + -- case where the process stalls for a long time the lock may time out and + -- be taken out by another instance, at which point the original instance + -- can tell it no longer holds the lock as the tokens no longer match). + token TEXT NOT NULL, + last_renewed_ts BIGINT NOT NULL +); +CREATE UNIQUE INDEX worker_locks_key ON worker_locks (lock_name, lock_key); +CREATE TABLE federation_inbound_events_staging ( + origin TEXT NOT NULL, + room_id TEXT NOT NULL, + event_id TEXT NOT NULL, + received_ts BIGINT NOT NULL, + event_json TEXT NOT NULL, + internal_metadata TEXT NOT NULL +); +CREATE INDEX federation_inbound_events_staging_room ON federation_inbound_events_staging(room_id, received_ts); +CREATE UNIQUE INDEX federation_inbound_events_staging_instance_event ON federation_inbound_events_staging(origin, event_id); +CREATE TABLE insertion_event_edges( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + insertion_prev_event_id TEXT NOT NULL +); +CREATE INDEX insertion_event_edges_insertion_room_id ON insertion_event_edges(room_id); +CREATE INDEX insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); +CREATE TABLE insertion_event_extremities( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL +); +CREATE UNIQUE INDEX insertion_event_extremities_event_id ON insertion_event_extremities(event_id); +CREATE INDEX insertion_event_extremities_room_id ON insertion_event_extremities(room_id); +CREATE TABLE registration_tokens( + token TEXT NOT NULL, -- The token that can be used for authentication. + uses_allowed INT, -- The total number of times this token can be used. NULL if no limit. + pending INT NOT NULL, -- The number of in progress registrations using this token. + completed INT NOT NULL, -- The number of times this token has been used to complete a registration. + expiry_time BIGINT, -- The latest time this token will be valid (epoch time in milliseconds). NULL if token doesn't expire. + UNIQUE (token) +); +CREATE TABLE sessions( + session_type TEXT NOT NULL, -- The unique key for this type of session. + session_id TEXT NOT NULL, -- The session ID passed to the client. + value TEXT NOT NULL, -- A JSON dictionary to persist. + expiry_time_ms BIGINT NOT NULL, -- The time this session will expire (epoch time in milliseconds). + UNIQUE (session_type, session_id) +); +CREATE TABLE insertion_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + next_batch_id TEXT NOT NULL +); +CREATE UNIQUE INDEX insertion_events_event_id ON insertion_events(event_id); +CREATE INDEX insertion_events_next_batch_id ON insertion_events(next_batch_id); +CREATE TABLE batch_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + batch_id TEXT NOT NULL +); +CREATE UNIQUE INDEX batch_events_event_id ON batch_events(event_id); +CREATE INDEX batch_events_batch_id ON batch_events(batch_id); +CREATE INDEX insertion_event_edges_event_id ON insertion_event_edges(event_id); +CREATE TABLE device_auth_providers ( + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + auth_provider_id TEXT NOT NULL, + auth_provider_session_id TEXT NOT NULL +); +CREATE INDEX device_auth_providers_devices + ON device_auth_providers (user_id, device_id); +CREATE INDEX device_auth_providers_sessions + ON device_auth_providers (auth_provider_id, auth_provider_session_id); +CREATE INDEX refresh_tokens_next_token_id + ON refresh_tokens(next_token_id) + WHERE next_token_id IS NOT NULL; +CREATE TABLE partial_state_rooms ( + room_id TEXT PRIMARY KEY, + FOREIGN KEY(room_id) REFERENCES rooms(room_id) +); +CREATE TABLE partial_state_rooms_servers ( + room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id), + server_name TEXT NOT NULL, + UNIQUE(room_id, server_name) +); +CREATE TABLE partial_state_events ( + -- the room_id is denormalised for efficient indexing (the canonical source is `events`) + room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id), + event_id TEXT NOT NULL REFERENCES events(event_id), + UNIQUE(event_id) +); +CREATE INDEX partial_state_events_room_id_idx + ON partial_state_events (room_id); +CREATE TRIGGER partial_state_events_bad_room_id + BEFORE INSERT ON partial_state_events + FOR EACH ROW + BEGIN + SELECT RAISE(ABORT, 'Incorrect room_id in partial_state_events') + WHERE EXISTS ( + SELECT 1 FROM events + WHERE events.event_id = NEW.event_id + AND events.room_id != NEW.room_id + ); + END; +CREATE TABLE device_lists_changes_in_room ( + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + room_id TEXT NOT NULL, + + -- This initially matches `device_lists_stream.stream_id`. Note that we + -- delete older values from `device_lists_stream`, so we can't use a foreign + -- constraint here. + -- + -- The table will contain rows with the same `stream_id` but different + -- `room_id`, as for each device update we store a row per room the user is + -- joined to. Therefore `(stream_id, room_id)` gives a unique index. + stream_id BIGINT NOT NULL, + + -- We have a background process which goes through this table and converts + -- entries into rows in `device_lists_outbound_pokes`. Once we have processed + -- a row, we mark it as such by setting `converted_to_destinations=TRUE`. + converted_to_destinations BOOLEAN NOT NULL, + opentracing_context TEXT +); +CREATE UNIQUE INDEX device_lists_changes_in_stream_id ON device_lists_changes_in_room(stream_id, room_id); +CREATE INDEX device_lists_changes_in_stream_id_unconverted ON device_lists_changes_in_room(stream_id) WHERE NOT converted_to_destinations; +CREATE TABLE IF NOT EXISTS "event_edges" ( + event_id TEXT NOT NULL, + prev_event_id TEXT NOT NULL, + room_id TEXT NULL, + is_state BOOL NOT NULL DEFAULT 0, + FOREIGN KEY(event_id) REFERENCES events(event_id) +); +CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx + ON event_edges (event_id, prev_event_id); +CREATE INDEX ev_edges_prev_id ON event_edges (prev_event_id); +CREATE TABLE event_push_summary_last_receipt_stream_id ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); +CREATE TABLE IF NOT EXISTS "application_services_state" ( + as_id TEXT PRIMARY KEY NOT NULL, + state VARCHAR(5), + read_receipt_stream_id BIGINT, + presence_stream_id BIGINT, + to_device_stream_id BIGINT, + device_list_stream_id BIGINT +); +CREATE TABLE IF NOT EXISTS "receipts_linearized" ( + stream_id BIGINT NOT NULL, + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL, + thread_id TEXT, + event_stream_ordering BIGINT, + data TEXT NOT NULL, + CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id), + CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); +CREATE TABLE IF NOT EXISTS "receipts_graph" ( + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_ids TEXT NOT NULL, + thread_id TEXT, + data TEXT NOT NULL, + CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id), + CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); +CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id ); +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id ); +CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id ); +CREATE INDEX redactions_have_censored_ts ON redactions (received_ts) WHERE NOT have_censored; +CREATE INDEX room_memberships_user_room_forgotten ON room_memberships (user_id, room_id) WHERE forgotten = 1; +CREATE INDEX users_have_local_media ON local_media_repository (user_id, created_ts) ; +CREATE UNIQUE INDEX e2e_cross_signing_keys_stream_idx ON e2e_cross_signing_keys (stream_id) ; +CREATE INDEX user_external_ids_user_id_idx ON user_external_ids (user_id) ; +CREATE INDEX presence_stream_state_not_offline_idx ON presence_stream (state) WHERE state != 'offline'; +CREATE UNIQUE INDEX event_push_summary_unique_index ON event_push_summary (user_id, room_id) ; +CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ; +CREATE UNIQUE INDEX receipts_graph_unique_index ON receipts_graph (room_id, receipt_type, user_id) WHERE thread_id IS NULL; +CREATE UNIQUE INDEX receipts_linearized_unique_index ON receipts_linearized (room_id, receipt_type, user_id) WHERE thread_id IS NULL; +CREATE INDEX event_push_actions_stream_highlight_index ON event_push_actions (highlight, stream_ordering) WHERE highlight=0; +CREATE INDEX current_state_events_member_index ON current_state_events (state_key) WHERE type='m.room.member'; +CREATE INDEX event_contains_url_index ON events (room_id, topological_ordering, stream_ordering) WHERE contains_url = true AND outlier = false; +CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering) WHERE highlight=1; +CREATE INDEX local_media_repository_url_idx ON local_media_repository (created_ts) WHERE url_cache IS NOT NULL; +INSERT INTO appservice_stream_position VALUES('X',0); +INSERT INTO federation_stream_position VALUES('federation',-1,'master'); +INSERT INTO federation_stream_position VALUES('events',-1,'master'); +INSERT INTO event_push_summary_stream_ordering VALUES('X',0); +INSERT INTO user_directory_stream_pos VALUES('X',1); +INSERT INTO stats_incremental_position VALUES('X',1); +INSERT INTO event_push_summary_last_receipt_stream_id VALUES('X',0); diff --git a/synapse/storage/schema/state/full_schemas/72/full.sql.postgres b/synapse/storage/schema/state/full_schemas/72/full.sql.postgres new file mode 100644 index 0000000000..263ade761e --- /dev/null +++ b/synapse/storage/schema/state/full_schemas/72/full.sql.postgres @@ -0,0 +1,30 @@ +CREATE TABLE state_group_edges ( + state_group bigint NOT NULL, + prev_state_group bigint NOT NULL +); +CREATE SEQUENCE state_group_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE state_groups ( + id bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL +); +CREATE TABLE state_groups_state ( + state_group bigint NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + event_id text NOT NULL +); +ALTER TABLE ONLY state_groups_state ALTER COLUMN state_group SET (n_distinct=-0.02); +ALTER TABLE ONLY state_groups + ADD CONSTRAINT state_groups_pkey PRIMARY KEY (id); +CREATE INDEX state_group_edges_prev_idx ON state_group_edges USING btree (prev_state_group); +CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges USING btree (state_group, prev_state_group); +CREATE INDEX state_groups_room_id_idx ON state_groups USING btree (room_id); +CREATE INDEX state_groups_state_type_idx ON state_groups_state USING btree (state_group, type, state_key); +SELECT pg_catalog.setval('state_group_id_seq', 1, false); diff --git a/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite new file mode 100644 index 0000000000..dda060b638 --- /dev/null +++ b/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite @@ -0,0 +1,20 @@ +CREATE TABLE state_groups ( + id BIGINT PRIMARY KEY, + room_id TEXT NOT NULL, + event_id TEXT NOT NULL +); +CREATE TABLE state_groups_state ( + state_group BIGINT NOT NULL, + room_id TEXT NOT NULL, + type TEXT NOT NULL, + state_key TEXT NOT NULL, + event_id TEXT NOT NULL +); +CREATE TABLE state_group_edges ( + state_group BIGINT NOT NULL, + prev_state_group BIGINT NOT NULL +); +CREATE INDEX state_group_edges_prev_idx ON state_group_edges (prev_state_group); +CREATE INDEX state_groups_state_type_idx ON state_groups_state (state_group, type, state_key); +CREATE INDEX state_groups_room_id_idx ON state_groups (room_id) ; +CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges (state_group, prev_state_group) ; -- cgit 1.5.1 From f5aaa55e2702af3cac1e195bf5d703970c24ff29 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 27 Sep 2022 17:26:35 +0100 Subject: Add new columns tracking when we partial-joined (#13892) --- changelog.d/13892.feature | 1 + synapse/handlers/federation.py | 14 +++++- synapse/storage/databases/main/room.py | 52 +++++++++++++++++++++- .../main/delta/73/04partial_join_details.sql | 23 ++++++++++ 4 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13892.feature create mode 100644 synapse/storage/schema/main/delta/73/04partial_join_details.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13892.feature b/changelog.d/13892.feature new file mode 100644 index 0000000000..df3f576536 --- /dev/null +++ b/changelog.d/13892.feature @@ -0,0 +1 @@ +Faster remote room joins: record _when_ we first partial-join to a room. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e1a4265a64..74580f60df 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -581,7 +581,11 @@ class FederationHandler: # Mark the room as having partial state. # The background process is responsible for unmarking this flag, # even if the join fails. - await self.store.store_partial_state_room(room_id, ret.servers_in_room) + await self.store.store_partial_state_room( + room_id=room_id, + servers=ret.servers_in_room, + device_lists_stream_id=self.store.get_device_stream_token(), + ) try: max_stream_id = ( @@ -606,6 +610,14 @@ class FederationHandler: room_id, ) raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) + else: + # Record the join event id for future use (when we finish the full + # join). We have to do this after persisting the event to keep foreign + # key constraints intact. + if ret.partial_state: + await self.store.write_partial_state_rooms_join_event_id( + room_id, event.event_id + ) finally: # Always kick off the background process that asynchronously fetches # state for the room. diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 5dd116d766..064c332fb7 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1777,28 +1777,46 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): self, room_id: str, servers: Collection[str], + device_lists_stream_id: int, ) -> None: - """Mark the given room as containing events with partial state + """Mark the given room as containing events with partial state. + + We also store additional data that describes _when_ we first partial-joined this + room, which helps us to keep other homeservers in sync when we finally fully + join this room. + + We do not include a `join_event_id` here---we need to wait for the join event + to be persisted first. Args: room_id: the ID of the room servers: other servers known to be in the room + device_lists_stream_id: the device_lists stream ID at the time when we first + joined the room. """ await self.db_pool.runInteraction( "store_partial_state_room", self._store_partial_state_room_txn, room_id, servers, + device_lists_stream_id, ) def _store_partial_state_room_txn( - self, txn: LoggingTransaction, room_id: str, servers: Collection[str] + self, + txn: LoggingTransaction, + room_id: str, + servers: Collection[str], + device_lists_stream_id: int, ) -> None: DatabasePool.simple_insert_txn( txn, table="partial_state_rooms", values={ "room_id": room_id, + "device_lists_stream_id": device_lists_stream_id, + # To be updated later once the join event is persisted. + "join_event_id": None, }, ) DatabasePool.simple_insert_many_txn( @@ -1809,6 +1827,36 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): ) self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,)) + async def write_partial_state_rooms_join_event_id( + self, + room_id: str, + join_event_id: str, + ) -> None: + """Record the join event which resulted from a partial join. + + We do this separately to `store_partial_state_room` because we need to wait for + the join event to be persisted. Otherwise we violate a foreign key constraint. + """ + await self.db_pool.runInteraction( + "write_partial_state_rooms_join_event_id", + self._write_partial_state_rooms_join_event_id, + room_id, + join_event_id, + ) + + def _write_partial_state_rooms_join_event_id( + self, + txn: LoggingTransaction, + room_id: str, + join_event_id: str, + ) -> None: + DatabasePool.simple_update_txn( + txn, + table="partial_state_rooms", + keyvalues={"room_id": room_id}, + updatevalues={"join_event_id": join_event_id}, + ) + async def maybe_store_room_on_outlier_membership( self, room_id: str, room_version: RoomVersion ) -> None: diff --git a/synapse/storage/schema/main/delta/73/04partial_join_details.sql b/synapse/storage/schema/main/delta/73/04partial_join_details.sql new file mode 100644 index 0000000000..5fb2bfe1a2 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/04partial_join_details.sql @@ -0,0 +1,23 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- To ensure we correctly notify other homeservers about device list changes from our +-- users after a partial join transitions to a full join, we need to know when we began +-- the partial join. For now it's sufficient to know the device_list stream_id at the +-- time of the partial join, and the join event created for us during a partial join. +-- +-- Both columns are backwards compatible. +ALTER TABLE partial_state_rooms ADD COLUMN device_lists_stream_id BIGINT NOT NULL DEFAULT 0; +ALTER TABLE partial_state_rooms ADD COLUMN join_event_id TEXT REFERENCES events(event_id); -- cgit 1.5.1 From 4b17a5ace846d82b09fccce79da77a8207a6765f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Sep 2022 14:42:43 +0100 Subject: Handle remote device list updates during partial join (#13913) c.f. #12993 (comment), point 3 This stores all device list updates that we receive while partial joins are ongoing, and processes them once we have the full state. Note: We don't actually process the device lists in the same ways as if we weren't partially joined. Instead of updating the device list remote cache, we simply notify local users that a change in the remote user's devices has happened. I think this is safe as if the local user requests the keys for the remote user and we don't have them we'll simply fetch them as normal. --- changelog.d/13913.misc | 1 + synapse/handlers/device.py | 62 ++++++++++++++++++++++ synapse/handlers/federation.py | 4 ++ synapse/storage/databases/main/devices.py | 55 +++++++++++++++++++ synapse/storage/databases/main/room.py | 20 +++++++ .../delta/73/04pending_device_list_updates.sql | 28 ++++++++++ 6 files changed, 170 insertions(+) create mode 100644 changelog.d/13913.misc create mode 100644 synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13913.misc b/changelog.d/13913.misc new file mode 100644 index 0000000000..30b4401049 --- /dev/null +++ b/changelog.d/13913.misc @@ -0,0 +1 @@ +Faster remote room joins: correctly handle remote device list updates during a partial join. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index bad262731c..f2ef591103 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -309,6 +309,17 @@ class DeviceWorkerHandler: "self_signing_key": self_signing_key, } + async def handle_room_un_partial_stated(self, room_id: str) -> None: + """Handles sending appropriate device list updates in a room that has + gone from partial to full state. + """ + + # TODO(faster_joins): worker mode support + # https://github.com/matrix-org/synapse/issues/12994 + logger.error( + "Trying handling device list state for partial join: not supported on workers." + ) + class DeviceHandler(DeviceWorkerHandler): def __init__(self, hs: "HomeServer"): @@ -746,6 +757,15 @@ class DeviceHandler(DeviceWorkerHandler): finally: self._handle_new_device_update_is_processing = False + async def handle_room_un_partial_stated(self, room_id: str) -> None: + """Handles sending appropriate device list updates in a room that has + gone from partial to full state. + """ + + # We defer to the device list updater implementation as we're on the + # right worker. + await self.device_list_updater.handle_room_un_partial_stated(room_id) + def _update_device_from_client_ips( device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]] @@ -836,6 +856,16 @@ class DeviceListUpdater: ) return + # Check if we are partially joining any rooms. If so we need to store + # all device list updates so that we can handle them correctly once we + # know who is in the room. + partial_rooms = await self.store.get_partial_state_rooms_and_servers() + if partial_rooms: + await self.store.add_remote_device_list_to_pending( + user_id, + device_id, + ) + room_ids = await self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we @@ -1175,3 +1205,35 @@ class DeviceListUpdater: device_ids.append(verify_key.version) return device_ids + + async def handle_room_un_partial_stated(self, room_id: str) -> None: + """Handles sending appropriate device list updates in a room that has + gone from partial to full state. + """ + + pending_updates = ( + await self.store.get_pending_remote_device_list_updates_for_room(room_id) + ) + + for user_id, device_id in pending_updates: + logger.info( + "Got pending device list update in room %s: %s / %s", + room_id, + user_id, + device_id, + ) + position = await self.store.add_device_change_to_streams( + user_id, + [device_id], + room_ids=[room_id], + ) + + if not position: + # This should only happen if there are no updates, which + # shouldn't happen when we've passed in a non-empty set of + # device IDs. + continue + + self.device_handler.notifier.on_new_event( + StreamKeyType.DEVICE_LIST, position, rooms=[room_id] + ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8f847ff845..360ab6fee2 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -149,6 +149,7 @@ class FederationHandler: self.http_client = hs.get_proxied_blacklisted_http_client() self._replication = hs.get_replication_data_handler() self._federation_event_handler = hs.get_federation_event_handler() + self._device_handler = hs.get_device_handler() self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator() self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client( @@ -1631,6 +1632,9 @@ class FederationHandler: # https://github.com/matrix-org/synapse/issues/12994 await self.state_handler.update_current_state(room_id) + logger.info("Handling any pending device list updates") + await self._device_handler.handle_room_un_partial_stated(room_id) + logger.info("Clearing partial-state flag for %s", room_id) success = await self.store.clear_partial_state_room(room_id) if success: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 1151fb0cc3..1e562d4a40 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1995,3 +1995,58 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): add_device_list_outbound_pokes_txn, stream_ids, ) + + async def add_remote_device_list_to_pending( + self, user_id: str, device_id: str + ) -> None: + """Add a device list update to the table tracking remote device list + updates during partial joins. + """ + + async with self._device_list_id_gen.get_next() as stream_id: # type: ignore[attr-defined] + await self.db_pool.simple_upsert( + table="device_lists_remote_pending", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={"stream_id": stream_id}, + desc="add_remote_device_list_to_pending", + ) + + async def get_pending_remote_device_list_updates_for_room( + self, room_id: str + ) -> Collection[Tuple[str, str]]: + """Get the set of remote device list updates from the pending table for + the room. + """ + + min_device_stream_id = await self.db_pool.simple_select_one_onecol( + table="partial_state_rooms", + keyvalues={ + "room_id": room_id, + }, + retcol="device_lists_stream_id", + desc="get_pending_remote_device_list_updates_for_room_device", + ) + + sql = """ + SELECT user_id, device_id FROM device_lists_remote_pending AS d + INNER JOIN current_state_events AS c ON + type = 'm.room.member' + AND state_key = user_id + AND membership = 'join' + WHERE + room_id = ? AND stream_id > ? + """ + + def get_pending_remote_device_list_updates_for_room_txn( + txn: LoggingTransaction, + ) -> Collection[Tuple[str, str]]: + txn.execute(sql, (room_id, min_device_stream_id)) + return cast(Collection[Tuple[str, str]], txn.fetchall()) + + return await self.db_pool.runInteraction( + "get_pending_remote_device_list_updates_for_room", + get_pending_remote_device_list_updates_for_room_txn, + ) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 064c332fb7..672c9a03fc 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1217,6 +1217,26 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): ) self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,)) + # We now delete anything from `device_lists_remote_pending` with a + # stream ID less than the minimum + # `partial_state_rooms.device_lists_stream_id`, as we no longer need them. + device_lists_stream_id = DatabasePool.simple_select_one_onecol_txn( + txn, + table="partial_state_rooms", + keyvalues={}, + retcol="MIN(device_lists_stream_id)", + allow_none=True, + ) + if device_lists_stream_id is None: + # There are no rooms being currently partially joined, so we delete everything. + txn.execute("DELETE FROM device_lists_remote_pending") + else: + sql = """ + DELETE FROM device_lists_remote_pending + WHERE stream_id <= ? + """ + txn.execute(sql, (device_lists_stream_id,)) + @cached() async def is_partial_state_room(self, room_id: str) -> bool: """Checks if this room has partial state. diff --git a/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql b/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql new file mode 100644 index 0000000000..dbd78d677d --- /dev/null +++ b/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql @@ -0,0 +1,28 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Stores remote device lists we have received for remote users while a partial +-- join is in progress. +-- +-- This allows us to replay any device list updates if it turns out the remote +-- user was in the partially joined room +CREATE TABLE device_lists_remote_pending( + stream_id BIGINT PRIMARY KEY, + user_id TEXT NOT NULL, + device_id TEXT NOT NULL +); + +-- We only keep the most recent update for a given user/device pair. +CREATE UNIQUE INDEX device_lists_remote_pending_user_device_id ON device_lists_remote_pending(user_id, device_id); -- cgit 1.5.1 From be76cd8200b18f3c68b895f85ac7ef5b0ddc2466 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 29 Sep 2022 14:23:24 +0100 Subject: Allow admins to require a manual approval process before new accounts can be used (using MSC3866) (#13556) --- changelog.d/13556.feature | 1 + synapse/_scripts/synapse_port_db.py | 2 +- synapse/api/constants.py | 11 ++ synapse/api/errors.py | 16 ++ synapse/config/experimental.py | 19 +++ synapse/handlers/admin.py | 5 + synapse/handlers/auth.py | 11 ++ synapse/handlers/register.py | 8 + synapse/replication/http/register.py | 5 + synapse/rest/admin/users.py | 43 ++++- synapse/rest/client/login.py | 37 +++- synapse/rest/client/register.py | 22 ++- synapse/storage/databases/main/__init__.py | 9 +- synapse/storage/databases/main/registration.py | 150 +++++++++++++++-- .../main/delta/73/03users_approved_column.sql | 20 +++ tests/rest/admin/test_user.py | 186 ++++++++++++++++++++- tests/rest/client/test_auth.py | 33 +++- tests/rest/client/test_login.py | 41 +++++ tests/rest/client/test_register.py | 32 +++- tests/rest/client/utils.py | 12 +- tests/storage/test_registration.py | 102 ++++++++++- 21 files changed, 731 insertions(+), 34 deletions(-) create mode 100644 changelog.d/13556.feature create mode 100644 synapse/storage/schema/main/delta/73/03users_approved_column.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13556.feature b/changelog.d/13556.feature new file mode 100644 index 0000000000..f9d63db6c0 --- /dev/null +++ b/changelog.d/13556.feature @@ -0,0 +1 @@ +Allow server admins to require a manual approval process before new accounts can be used (using [MSC3866](https://github.com/matrix-org/matrix-spec-proposals/pull/3866)). diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 450ba462ba..5fa599e70e 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -107,7 +107,7 @@ BOOLEAN_COLUMNS = { "redactions": ["have_censored"], "room_stats_state": ["is_federatable"], "local_media_repository": ["safe_from_quarantine"], - "users": ["shadow_banned"], + "users": ["shadow_banned", "approved"], "e2e_fallback_keys_json": ["used"], "access_tokens": ["used"], "device_lists_changes_in_room": ["converted_to_destinations"], diff --git a/synapse/api/constants.py b/synapse/api/constants.py index c178ddf070..c031903b1a 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -269,3 +269,14 @@ class PublicRoomsFilterFields: GENERIC_SEARCH_TERM: Final = "generic_search_term" ROOM_TYPES: Final = "room_types" + + +class ApprovalNoticeMedium: + """Identifier for the medium this server will use to serve notice of approval for a + specific user's registration. + + As defined in https://github.com/matrix-org/matrix-spec-proposals/blob/babolivier/m_not_approved/proposals/3866-user-not-approved-error.md + """ + + NONE = "org.matrix.msc3866.none" + EMAIL = "org.matrix.msc3866.email" diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 1c6b53aa24..c606207569 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -106,6 +106,8 @@ class Codes(str, Enum): # Part of MSC3895. UNABLE_DUE_TO_PARTIAL_STATE = "ORG.MATRIX.MSC3895_UNABLE_DUE_TO_PARTIAL_STATE" + USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL" + class CodeMessageException(RuntimeError): """An exception with integer code and message string attributes. @@ -566,6 +568,20 @@ class UnredactedContentDeletedError(SynapseError): return cs_error(self.msg, self.errcode, **extra) +class NotApprovedError(SynapseError): + def __init__( + self, + msg: str, + approval_notice_medium: str, + ): + super().__init__( + code=403, + msg=msg, + errcode=Codes.USER_AWAITING_APPROVAL, + additional_fields={"approval_notice_medium": approval_notice_medium}, + ) + + def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs: Any) -> "JsonDict": """Utility method for constructing an error response for client-server interactions. diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 933779c23a..31834fb27d 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -14,10 +14,25 @@ from typing import Any +import attr + from synapse.config._base import Config from synapse.types import JsonDict +@attr.s(auto_attribs=True, frozen=True, slots=True) +class MSC3866Config: + """Configuration for MSC3866 (mandating approval for new users)""" + + # Whether the base support for the approval process is enabled. This includes the + # ability for administrators to check and update the approval of users, even if no + # approval is currently required. + enabled: bool = False + # Whether to require that new users are approved by an admin before their account + # can be used. Note that this setting is ignored if 'enabled' is false. + require_approval_for_new_accounts: bool = False + + class ExperimentalConfig(Config): """Config section for enabling experimental features""" @@ -97,6 +112,10 @@ class ExperimentalConfig(Config): # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices. self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False) + # MSC3866: M_USER_AWAITING_APPROVAL error code + raw_msc3866_config = experimental.get("msc3866", {}) + self.msc3866 = MSC3866Config(**raw_msc3866_config) + # MSC3881: Remotely toggle push notifications for another client self.msc3881_enabled: bool = experimental.get("msc3881_enabled", False) diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index cf9f19608a..f2989cc4a2 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -32,6 +32,7 @@ class AdminHandler: self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state + self._msc3866_enabled = hs.config.experimental.msc3866.enabled async def get_whois(self, user: UserID) -> JsonDict: connections = [] @@ -75,6 +76,10 @@ class AdminHandler: "is_guest", } + if self._msc3866_enabled: + # Only include the approved flag if support for MSC3866 is enabled. + user_info_to_return.add("approved") + # Restrict returned keys to a known set. user_info_dict = { key: value diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index eacd631ee0..f5f0e0e7a7 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1009,6 +1009,17 @@ class AuthHandler: return res[0] return None + async def is_user_approved(self, user_id: str) -> bool: + """Checks if a user is approved and therefore can be allowed to log in. + + Args: + user_id: the user to check the approval status of. + + Returns: + A boolean that is True if the user is approved, False otherwise. + """ + return await self.store.is_user_approved(user_id) + async def _find_user_id_and_pwd_hash( self, user_id: str ) -> Optional[Tuple[str, str]]: diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index cfcadb34db..ca1c7a1866 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -220,6 +220,7 @@ class RegistrationHandler: by_admin: bool = False, user_agent_ips: Optional[List[Tuple[str, str]]] = None, auth_provider_id: Optional[str] = None, + approved: bool = False, ) -> str: """Registers a new client on the server. @@ -246,6 +247,8 @@ class RegistrationHandler: user_agent_ips: Tuples of user-agents and IP addresses used during the registration process. auth_provider_id: The SSO IdP the user used, if any. + approved: True if the new user should be considered already + approved by an administrator. Returns: The registered user_id. Raises: @@ -307,6 +310,7 @@ class RegistrationHandler: user_type=user_type, address=address, shadow_banned=shadow_banned, + approved=approved, ) profile = await self.store.get_profileinfo(localpart) @@ -695,6 +699,7 @@ class RegistrationHandler: user_type: Optional[str] = None, address: Optional[str] = None, shadow_banned: bool = False, + approved: bool = False, ) -> None: """Register user in the datastore. @@ -713,6 +718,7 @@ class RegistrationHandler: api.constants.UserTypes, or None for a normal user. address: the IP address used to perform the registration. shadow_banned: Whether to shadow-ban the user + approved: Whether to mark the user as approved by an administrator """ if self.hs.config.worker.worker_app: await self._register_client( @@ -726,6 +732,7 @@ class RegistrationHandler: user_type=user_type, address=address, shadow_banned=shadow_banned, + approved=approved, ) else: await self.store.register_user( @@ -738,6 +745,7 @@ class RegistrationHandler: admin=admin, user_type=user_type, shadow_banned=shadow_banned, + approved=approved, ) # Only call the account validity module(s) on the main process, to avoid diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 6c8f8388fd..61abb529c8 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -51,6 +51,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): user_type: Optional[str], address: Optional[str], shadow_banned: bool, + approved: bool, ) -> JsonDict: """ Args: @@ -68,6 +69,8 @@ class ReplicationRegisterServlet(ReplicationEndpoint): or None for a normal user. address: the IP address used to perform the regitration. shadow_banned: Whether to shadow-ban the user + approved: Whether the user should be considered already approved by an + administrator. """ return { "password_hash": password_hash, @@ -79,6 +82,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): "user_type": user_type, "address": address, "shadow_banned": shadow_banned, + "approved": approved, } async def _handle_request( # type: ignore[override] @@ -99,6 +103,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): user_type=content["user_type"], address=content["address"], shadow_banned=content["shadow_banned"], + approved=content["approved"], ) return 200, {} diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 1274773d7e..15ac2059aa 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -69,6 +69,7 @@ class UsersRestServletV2(RestServlet): self.store = hs.get_datastores().main self.auth = hs.get_auth() self.admin_handler = hs.get_admin_handler() + self._msc3866_enabled = hs.config.experimental.msc3866.enabled async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) @@ -95,6 +96,13 @@ class UsersRestServletV2(RestServlet): guests = parse_boolean(request, "guests", default=True) deactivated = parse_boolean(request, "deactivated", default=False) + # If support for MSC3866 is not enabled, apply no filtering based on the + # `approved` column. + if self._msc3866_enabled: + approved = parse_boolean(request, "approved", default=True) + else: + approved = True + order_by = parse_string( request, "order_by", @@ -115,8 +123,22 @@ class UsersRestServletV2(RestServlet): direction = parse_string(request, "dir", default="f", allowed_values=("f", "b")) users, total = await self.store.get_users_paginate( - start, limit, user_id, name, guests, deactivated, order_by, direction + start, + limit, + user_id, + name, + guests, + deactivated, + order_by, + direction, + approved, ) + + # If support for MSC3866 is not enabled, don't show the approval flag. + if not self._msc3866_enabled: + for user in users: + del user["approved"] + ret = {"users": users, "total": total} if (start + limit) < total: ret["next_token"] = str(start + len(users)) @@ -163,6 +185,7 @@ class UserRestServletV2(RestServlet): self.deactivate_account_handler = hs.get_deactivate_account_handler() self.registration_handler = hs.get_registration_handler() self.pusher_pool = hs.get_pusherpool() + self._msc3866_enabled = hs.config.experimental.msc3866.enabled async def on_GET( self, request: SynapseRequest, user_id: str @@ -239,6 +262,15 @@ class UserRestServletV2(RestServlet): HTTPStatus.BAD_REQUEST, "'deactivated' parameter is not of type boolean" ) + approved: Optional[bool] = None + if "approved" in body and self._msc3866_enabled: + approved = body["approved"] + if not isinstance(approved, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "'approved' parameter is not of type boolean", + ) + # convert List[Dict[str, str]] into List[Tuple[str, str]] if external_ids is not None: new_external_ids = [ @@ -343,6 +375,9 @@ class UserRestServletV2(RestServlet): if "user_type" in body: await self.store.set_user_type(target_user, user_type) + if approved is not None: + await self.store.update_user_approval_status(target_user, approved) + user = await self.admin_handler.get_user(target_user) assert user is not None @@ -355,6 +390,10 @@ class UserRestServletV2(RestServlet): if password is not None: password_hash = await self.auth_handler.hash(password) + new_user_approved = True + if self._msc3866_enabled and approved is not None: + new_user_approved = approved + user_id = await self.registration_handler.register_user( localpart=target_user.localpart, password_hash=password_hash, @@ -362,6 +401,7 @@ class UserRestServletV2(RestServlet): default_display_name=displayname, user_type=user_type, by_admin=True, + approved=new_user_approved, ) if threepids is not None: @@ -550,6 +590,7 @@ class UserRegisterServlet(RestServlet): user_type=user_type, default_display_name=displayname, by_admin=True, + approved=True, ) result = await register._create_registration_details(user_id, body) diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index 0437c87d8d..f554586ac3 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -28,7 +28,14 @@ from typing import ( from typing_extensions import TypedDict -from synapse.api.errors import Codes, InvalidClientTokenError, LoginError, SynapseError +from synapse.api.constants import ApprovalNoticeMedium +from synapse.api.errors import ( + Codes, + InvalidClientTokenError, + LoginError, + NotApprovedError, + SynapseError, +) from synapse.api.ratelimiting import Ratelimiter from synapse.api.urls import CLIENT_API_PREFIX from synapse.appservice import ApplicationService @@ -55,11 +62,11 @@ logger = logging.getLogger(__name__) class LoginResponse(TypedDict, total=False): user_id: str - access_token: str + access_token: Optional[str] home_server: str expires_in_ms: Optional[int] refresh_token: Optional[str] - device_id: str + device_id: Optional[str] well_known: Optional[Dict[str, Any]] @@ -92,6 +99,12 @@ class LoginRestServlet(RestServlet): hs.config.registration.refreshable_access_token_lifetime is not None ) + # Whether we need to check if the user has been approved or not. + self._require_approval = ( + hs.config.experimental.msc3866.enabled + and hs.config.experimental.msc3866.require_approval_for_new_accounts + ) + self.auth = hs.get_auth() self.clock = hs.get_clock() @@ -220,6 +233,14 @@ class LoginRestServlet(RestServlet): except KeyError: raise SynapseError(400, "Missing JSON keys.") + if self._require_approval: + approved = await self.auth_handler.is_user_approved(result["user_id"]) + if not approved: + raise NotApprovedError( + msg="This account is pending approval by a server administrator.", + approval_notice_medium=ApprovalNoticeMedium.NONE, + ) + well_known_data = self._well_known_builder.get_well_known() if well_known_data: result["well_known"] = well_known_data @@ -356,6 +377,16 @@ class LoginRestServlet(RestServlet): errcode=Codes.INVALID_PARAM, ) + if self._require_approval: + approved = await self.auth_handler.is_user_approved(user_id) + if not approved: + # If the user isn't approved (and needs to be) we won't allow them to + # actually log in, so we don't want to create a device/access token. + return LoginResponse( + user_id=user_id, + home_server=self.hs.hostname, + ) + initial_display_name = login_submission.get("initial_device_display_name") ( device_id, diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index 20bab20c8f..de810ae3ec 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -21,10 +21,15 @@ from twisted.web.server import Request import synapse import synapse.api.auth import synapse.types -from synapse.api.constants import APP_SERVICE_REGISTRATION_TYPE, LoginType +from synapse.api.constants import ( + APP_SERVICE_REGISTRATION_TYPE, + ApprovalNoticeMedium, + LoginType, +) from synapse.api.errors import ( Codes, InteractiveAuthIncompleteError, + NotApprovedError, SynapseError, ThreepidValidationError, UnrecognizedRequestError, @@ -414,6 +419,11 @@ class RegisterRestServlet(RestServlet): hs.config.registration.inhibit_user_in_use_error ) + self._require_approval = ( + hs.config.experimental.msc3866.enabled + and hs.config.experimental.msc3866.require_approval_for_new_accounts + ) + self._registration_flows = _calculate_registration_flows( hs.config, self.auth_handler ) @@ -734,6 +744,12 @@ class RegisterRestServlet(RestServlet): access_token=return_dict.get("access_token"), ) + if self._require_approval: + raise NotApprovedError( + msg="This account needs to be approved by an administrator before it can be used.", + approval_notice_medium=ApprovalNoticeMedium.NONE, + ) + return 200, return_dict async def _do_appservice_registration( @@ -778,7 +794,9 @@ class RegisterRestServlet(RestServlet): "user_id": user_id, "home_server": self.hs.hostname, } - if not params.get("inhibit_login", False): + # We don't want to log the user in if we're going to deny them access because + # they need to be approved first. + if not params.get("inhibit_login", False) and not self._require_approval: device_id = params.get("device_id") initial_display_name = params.get("initial_device_display_name") ( diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 0843f10340..a62b4abd4e 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -203,6 +203,7 @@ class DataStore( deactivated: bool = False, order_by: str = UserSortOrder.USER_ID.value, direction: str = "f", + approved: bool = True, ) -> Tuple[List[JsonDict], int]: """Function to retrieve a paginated list of users from users list. This will return a json list of users and the @@ -217,6 +218,7 @@ class DataStore( deactivated: whether to include deactivated users order_by: the sort order of the returned list direction: sort ascending or descending + approved: whether to include approved users Returns: A tuple of a list of mappings from user to information and a count of total users. """ @@ -249,6 +251,11 @@ class DataStore( if not deactivated: filters.append("deactivated = 0") + if not approved: + # We ignore NULL values for the approved flag because these should only + # be already existing users that we consider as already approved. + filters.append("approved IS FALSE") + where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" sql_base = f""" @@ -262,7 +269,7 @@ class DataStore( sql = f""" SELECT name, user_type, is_guest, admin, deactivated, shadow_banned, - displayname, avatar_url, creation_ts * 1000 as creation_ts + displayname, avatar_url, creation_ts * 1000 as creation_ts, approved {sql_base} ORDER BY {order_by_column} {order}, u.name ASC LIMIT ? OFFSET ? diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index ac821878b0..2996d6bb4d 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -166,27 +166,49 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): @cached() async def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]: """Deprecated: use get_userinfo_by_id instead""" - return await self.db_pool.simple_select_one( - table="users", - keyvalues={"name": user_id}, - retcols=[ - "name", - "password_hash", - "is_guest", - "admin", - "consent_version", - "consent_ts", - "consent_server_notice_sent", - "appservice_id", - "creation_ts", - "user_type", - "deactivated", - "shadow_banned", - ], - allow_none=True, + + def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]: + # We could technically use simple_select_one here, but it would not perform + # the COALESCEs (unless hacked into the column names), which could yield + # confusing results. + txn.execute( + """ + SELECT + name, password_hash, is_guest, admin, consent_version, consent_ts, + consent_server_notice_sent, appservice_id, creation_ts, user_type, + deactivated, COALESCE(shadow_banned, FALSE) AS shadow_banned, + COALESCE(approved, TRUE) AS approved + FROM users + WHERE name = ? + """, + (user_id,), + ) + + rows = self.db_pool.cursor_to_dict(txn) + + if len(rows) == 0: + return None + + return rows[0] + + row = await self.db_pool.runInteraction( desc="get_user_by_id", + func=get_user_by_id_txn, ) + if row is not None: + # If we're using SQLite our boolean values will be integers. Because we + # present some of this data as is to e.g. server admins via REST APIs, we + # want to make sure we're returning the right type of data. + # Note: when adding a column name to this list, be wary of NULLable columns, + # since NULL values will be turned into False. + boolean_columns = ["admin", "deactivated", "shadow_banned", "approved"] + for column in boolean_columns: + if not isinstance(row[column], bool): + row[column] = bool(row[column]) + + return row + async def get_userinfo_by_id(self, user_id: str) -> Optional[UserInfo]: """Get a UserInfo object for a user by user ID. @@ -1779,6 +1801,40 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): return res if res else False + @cached() + async def is_user_approved(self, user_id: str) -> bool: + """Checks if a user is approved and therefore can be allowed to log in. + + If the user's 'approved' column is NULL, we consider it as true given it means + the user was registered when support for an approval flow was either disabled + or nonexistent. + + Args: + user_id: the user to check the approval status of. + + Returns: + A boolean that is True if the user is approved, False otherwise. + """ + + def is_user_approved_txn(txn: LoggingTransaction) -> bool: + txn.execute( + """ + SELECT COALESCE(approved, TRUE) AS approved FROM users WHERE name = ? + """, + (user_id,), + ) + + rows = self.db_pool.cursor_to_dict(txn) + + # We cast to bool because the value returned by the database engine might + # be an integer if we're using SQLite. + return bool(rows[0]["approved"]) + + return await self.db_pool.runInteraction( + desc="is_user_pending_approval", + func=is_user_approved_txn, + ) + class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): def __init__( @@ -1916,6 +1972,29 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) txn.call_after(self.is_guest.invalidate, (user_id,)) + def update_user_approval_status_txn( + self, txn: LoggingTransaction, user_id: str, approved: bool + ) -> None: + """Set the user's 'approved' flag to the given value. + + The boolean is turned into an int because the column is a smallint. + + Args: + txn: the current database transaction. + user_id: the user to update the flag for. + approved: the value to set the flag to. + """ + self.db_pool.simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"approved": approved}, + ) + + # Invalidate the caches of methods that read the value of the 'approved' flag. + self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) + self._invalidate_cache_and_stream(txn, self.is_user_approved, (user_id,)) + class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): def __init__( @@ -1933,6 +2012,13 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._refresh_tokens_id_gen = IdGenerator(db_conn, "refresh_tokens", "id") + # If support for MSC3866 is enabled and configured to require approval for new + # account, we will create new users with an 'approved' flag set to false. + self._require_approval = ( + hs.config.experimental.msc3866.enabled + and hs.config.experimental.msc3866.require_approval_for_new_accounts + ) + async def add_access_token_to_user( self, user_id: str, @@ -2065,6 +2151,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): admin: bool = False, user_type: Optional[str] = None, shadow_banned: bool = False, + approved: bool = False, ) -> None: """Attempts to register an account. @@ -2083,6 +2170,8 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): or None for a normal user. shadow_banned: Whether the user is shadow-banned, i.e. they may be told their requests succeeded but we ignore them. + approved: Whether to consider the user has already been approved by an + administrator. Raises: StoreError if the user_id could not be registered. @@ -2099,6 +2188,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): admin, user_type, shadow_banned, + approved, ) def _register_user( @@ -2113,11 +2203,14 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): admin: bool, user_type: Optional[str], shadow_banned: bool, + approved: bool, ) -> None: user_id_obj = UserID.from_string(user_id) now = int(self._clock.time()) + user_approved = approved or not self._require_approval + try: if was_guest: # Ensure that the guest user actually exists @@ -2143,6 +2236,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): "admin": 1 if admin else 0, "user_type": user_type, "shadow_banned": shadow_banned, + "approved": user_approved, }, ) else: @@ -2158,6 +2252,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): "admin": 1 if admin else 0, "user_type": user_type, "shadow_banned": shadow_banned, + "approved": user_approved, }, ) @@ -2503,6 +2598,25 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): start_or_continue_validation_session_txn, ) + async def update_user_approval_status( + self, user_id: UserID, approved: bool + ) -> None: + """Set the user's 'approved' flag to the given value. + + The boolean will be turned into an int (in update_user_approval_status_txn) + because the column is a smallint. + + Args: + user_id: the user to update the flag for. + approved: the value to set the flag to. + """ + await self.db_pool.runInteraction( + "update_user_approval_status", + self.update_user_approval_status_txn, + user_id.to_string(), + approved, + ) + def find_max_generated_user_id_localpart(cur: Cursor) -> int: """ diff --git a/synapse/storage/schema/main/delta/73/03users_approved_column.sql b/synapse/storage/schema/main/delta/73/03users_approved_column.sql new file mode 100644 index 0000000000..5328d592ea --- /dev/null +++ b/synapse/storage/schema/main/delta/73/03users_approved_column.sql @@ -0,0 +1,20 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a column to the users table to track whether the user needs to be approved by an +-- administrator. +-- A NULL column means the user was created before this feature was supported by Synapse, +-- and should be considered as TRUE. +ALTER TABLE users ADD COLUMN approved BOOLEAN; diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 1847e6ad6b..4c1ce33463 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -25,10 +25,10 @@ from parameterized import parameterized, parameterized_class from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin -from synapse.api.constants import UserTypes +from synapse.api.constants import ApprovalNoticeMedium, LoginType, UserTypes from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError from synapse.api.room_versions import RoomVersions -from synapse.rest.client import devices, login, logout, profile, room, sync +from synapse.rest.client import devices, login, logout, profile, register, room, sync from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.server import HomeServer from synapse.types import JsonDict, UserID @@ -578,6 +578,16 @@ class UsersListTestCase(unittest.HomeserverTestCase): _search_test(None, "foo", "user_id") _search_test(None, "bar", "user_id") + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) def test_invalid_parameter(self) -> None: """ If parameters are invalid, an error is returned. @@ -623,6 +633,16 @@ class UsersListTestCase(unittest.HomeserverTestCase): self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + # invalid approved + channel = self.make_request( + "GET", + self.url + "?approved=not_bool", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + # unkown order_by channel = self.make_request( "GET", @@ -841,6 +861,69 @@ class UsersListTestCase(unittest.HomeserverTestCase): self._order_test([self.admin_user, user1, user2], "creation_ts", "f") self._order_test([user2, user1, self.admin_user], "creation_ts", "b") + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_filter_out_approved(self) -> None: + """Tests that the endpoint can filter out approved users.""" + # Create our users. + self._create_users(2) + + # Get the list of users. + channel = self.make_request( + "GET", + self.url, + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, channel.result) + + # Exclude the admin, because we don't want to accidentally un-approve the admin. + non_admin_user_ids = [ + user["name"] + for user in channel.json_body["users"] + if user["name"] != self.admin_user + ] + + self.assertEqual(2, len(non_admin_user_ids), non_admin_user_ids) + + # Select a user and un-approve them. We do this rather than the other way around + # because, since these users are created by an admin, we consider them already + # approved. + not_approved_user = non_admin_user_ids[0] + + channel = self.make_request( + "PUT", + f"/_synapse/admin/v2/users/{not_approved_user}", + {"approved": False}, + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, channel.result) + + # Now get the list of users again, this time filtering out approved users. + channel = self.make_request( + "GET", + self.url + "?approved=false", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, channel.result) + + non_admin_user_ids = [ + user["name"] + for user in channel.json_body["users"] + if user["name"] != self.admin_user + ] + + # We should only have our unapproved user now. + self.assertEqual(1, len(non_admin_user_ids), non_admin_user_ids) + self.assertEqual(not_approved_user, non_admin_user_ids[0]) + def _order_test( self, expected_user_list: List[str], @@ -1272,6 +1355,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): synapse.rest.admin.register_servlets, login.register_servlets, sync.register_servlets, + register.register_servlets, ] def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: @@ -2536,6 +2620,104 @@ class UserRestTestCase(unittest.HomeserverTestCase): # Ensure they're still alive self.assertEqual(0, channel.json_body["deactivated"]) + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_approve_account(self) -> None: + """Tests that approving an account correctly sets the approved flag for the user.""" + url = self.url_prefix % "@bob:test" + + # Create the user using the client-server API since otherwise the user will be + # marked as approved automatically. + channel = self.make_request( + "POST", + "register", + { + "username": "bob", + "password": "test", + "auth": {"type": LoginType.DUMMY}, + }, + ) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + + # Get user + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertIs(False, channel.json_body["approved"]) + + # Approve user + channel = self.make_request( + "PUT", + url, + access_token=self.admin_user_tok, + content={"approved": True}, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertIs(True, channel.json_body["approved"]) + + # Check that the user is now approved + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertIs(True, channel.json_body["approved"]) + + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_register_approved(self) -> None: + url = self.url_prefix % "@bob:test" + + # Create user + channel = self.make_request( + "PUT", + url, + access_token=self.admin_user_tok, + content={"password": "abc123", "approved": True}, + ) + + self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual("@bob:test", channel.json_body["name"]) + self.assertEqual(1, channel.json_body["approved"]) + + # Get user + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual("@bob:test", channel.json_body["name"]) + self.assertEqual(1, channel.json_body["approved"]) + def _is_erased(self, user_id: str, expect: bool) -> None: """Assert that the user is erased or not""" d = self.store.is_user_erased(user_id) diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index 05355c7fb6..090cef5216 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -20,7 +20,8 @@ from twisted.test.proto_helpers import MemoryReactor from twisted.web.resource import Resource import synapse.rest.admin -from synapse.api.constants import LoginType +from synapse.api.constants import ApprovalNoticeMedium, LoginType +from synapse.api.errors import Codes from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.rest.client import account, auth, devices, login, logout, register from synapse.rest.synapse.client import build_synapse_client_resource_tree @@ -567,6 +568,36 @@ class UIAuthTests(unittest.HomeserverTestCase): body={"auth": {"session": session_id}}, ) + @skip_unless(HAS_OIDC, "requires OIDC") + @override_config( + { + "oidc_config": TEST_OIDC_CONFIG, + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + }, + } + ) + def test_sso_not_approved(self) -> None: + """Tests that if we register a user via SSO while requiring approval for new + accounts, we still raise the correct error before logging the user in. + """ + login_resp = self.helper.login_via_oidc("username", expected_status=403) + + self.assertEqual(login_resp["errcode"], Codes.USER_AWAITING_APPROVAL) + self.assertEqual( + ApprovalNoticeMedium.NONE, login_resp["approval_notice_medium"] + ) + + # Check that we didn't register a device for the user during the login attempt. + devices = self.get_success( + self.hs.get_datastores().main.get_devices_by_user("@username:test") + ) + + self.assertEqual(len(devices), 0) + class RefreshAuthTests(unittest.HomeserverTestCase): servlets = [ diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index e2a4d98275..e801ba8c8b 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -23,6 +23,8 @@ from twisted.test.proto_helpers import MemoryReactor from twisted.web.resource import Resource import synapse.rest.admin +from synapse.api.constants import ApprovalNoticeMedium, LoginType +from synapse.api.errors import Codes from synapse.appservice import ApplicationService from synapse.rest.client import devices, login, logout, register from synapse.rest.client.account import WhoamiRestServlet @@ -94,6 +96,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): logout.register_servlets, devices.register_servlets, lambda hs, http_server: WhoamiRestServlet(hs).register(http_server), + register.register_servlets, ] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: @@ -406,6 +409,44 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, 400) self.assertEqual(channel.json_body["errcode"], "M_INVALID_PARAM") + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_require_approval(self) -> None: + channel = self.make_request( + "POST", + "register", + { + "username": "kermit", + "password": "monkey", + "auth": {"type": LoginType.DUMMY}, + }, + ) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + + params = { + "type": LoginType.PASSWORD, + "identifier": {"type": "m.id.user", "user": "kermit"}, + "password": "monkey", + } + channel = self.make_request("POST", LOGIN_URL, params) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + @skip_unless(has_saml2 and HAS_OIDC, "Requires SAML2 and OIDC") class MultiSSOTestCase(unittest.HomeserverTestCase): diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py index b781875d52..11cf3939d8 100644 --- a/tests/rest/client/test_register.py +++ b/tests/rest/client/test_register.py @@ -22,7 +22,11 @@ import pkg_resources from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin -from synapse.api.constants import APP_SERVICE_REGISTRATION_TYPE, LoginType +from synapse.api.constants import ( + APP_SERVICE_REGISTRATION_TYPE, + ApprovalNoticeMedium, + LoginType, +) from synapse.api.errors import Codes from synapse.appservice import ApplicationService from synapse.rest.client import account, account_validity, login, logout, register, sync @@ -765,6 +769,32 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, 400, channel.json_body) self.assertEqual(channel.json_body["errcode"], Codes.USER_IN_USE) + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_require_approval(self) -> None: + channel = self.make_request( + "POST", + "register", + { + "username": "kermit", + "password": "monkey", + "auth": {"type": LoginType.DUMMY}, + }, + ) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + class AccountValidityTestCase(unittest.HomeserverTestCase): diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index dd26145bf8..c249a42bb6 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -543,8 +543,12 @@ class RestHelper: return channel.json_body - def login_via_oidc(self, remote_user_id: str) -> JsonDict: - """Log in (as a new user) via OIDC + def login_via_oidc( + self, + remote_user_id: str, + expected_status: int = 200, + ) -> JsonDict: + """Log in via OIDC Returns the result of the final token login. @@ -578,7 +582,9 @@ class RestHelper: "/login", content={"type": "m.login.token", "token": login_token}, ) - assert channel.code == HTTPStatus.OK + assert ( + channel.code == expected_status + ), f"unexpected status in response: {channel.code}" return channel.json_body def auth_via_oidc( diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index 853a93afab..05ea802008 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -16,9 +16,10 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import UserTypes from synapse.api.errors import ThreepidValidationError from synapse.server import HomeServer +from synapse.types import JsonDict, UserID from synapse.util import Clock -from tests.unittest import HomeserverTestCase +from tests.unittest import HomeserverTestCase, override_config class RegistrationStoreTestCase(HomeserverTestCase): @@ -48,6 +49,7 @@ class RegistrationStoreTestCase(HomeserverTestCase): "user_type": None, "deactivated": 0, "shadow_banned": 0, + "approved": 1, }, (self.get_success(self.store.get_user_by_id(self.user_id))), ) @@ -166,3 +168,101 @@ class RegistrationStoreTestCase(HomeserverTestCase): ThreepidValidationError, ) self.assertEqual(e.value.msg, "Validation token not found or has expired", e) + + +class ApprovalRequiredRegistrationTestCase(HomeserverTestCase): + def default_config(self) -> JsonDict: + config = super().default_config() + + # If there's already some config for this feature in the default config, it + # means we're overriding it with @override_config. In this case we don't want + # to do anything more with it. + msc3866_config = config.get("experimental_features", {}).get("msc3866") + if msc3866_config is not None: + return config + + # Require approval for all new accounts. + config["experimental_features"] = { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + return config + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.user_id = "@my-user:test" + self.pwhash = "{xx1}123456789" + + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": False, + } + } + } + ) + def test_approval_not_required(self) -> None: + """Tests that if we don't require approval for new accounts, newly created + accounts are automatically marked as approved. + """ + self.get_success(self.store.register_user(self.user_id, self.pwhash)) + + user = self.get_success(self.store.get_user_by_id(self.user_id)) + assert user is not None + self.assertTrue(user["approved"]) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertTrue(approved) + + def test_approval_required(self) -> None: + """Tests that if we require approval for new accounts, newly created accounts + are not automatically marked as approved. + """ + self.get_success(self.store.register_user(self.user_id, self.pwhash)) + + user = self.get_success(self.store.get_user_by_id(self.user_id)) + assert user is not None + self.assertFalse(user["approved"]) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertFalse(approved) + + def test_override(self) -> None: + """Tests that if we require approval for new accounts, but we explicitly say the + new user should be considered approved, they're marked as approved. + """ + self.get_success( + self.store.register_user( + self.user_id, + self.pwhash, + approved=True, + ) + ) + + user = self.get_success(self.store.get_user_by_id(self.user_id)) + self.assertIsNotNone(user) + assert user is not None + self.assertEqual(user["approved"], 1) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertTrue(approved) + + def test_approve_user(self) -> None: + """Tests that approving the user updates their approval status.""" + self.get_success(self.store.register_user(self.user_id, self.pwhash)) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertFalse(approved) + + self.get_success( + self.store.update_user_approval_status( + UserID.from_string(self.user_id), True + ) + ) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertTrue(approved) -- cgit 1.5.1 From 5a6d02524685187b8ed212b8e8027e4d15575fd0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 3 Oct 2022 18:44:44 +0100 Subject: Clear out old rows from `event_push_actions_staging` (#14020) On matrix.org we have ~5 million stale rows in `event_push_actions_staging`, let's add a background job to make sure we clear them out. --- changelog.d/14020.misc | 1 + .../storage/databases/main/event_push_actions.py | 58 +++++++++++++++++++++- synapse/storage/schema/__init__.py | 1 + .../main/delta/73/05old_push_actions.sql.postgres | 22 ++++++++ .../main/delta/73/05old_push_actions.sql.sqlite | 24 +++++++++ 5 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14020.misc create mode 100644 synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres create mode 100644 synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite (limited to 'synapse/storage/schema') diff --git a/changelog.d/14020.misc b/changelog.d/14020.misc new file mode 100644 index 0000000000..85550b307d --- /dev/null +++ b/changelog.d/14020.misc @@ -0,0 +1 @@ +Clear out stale entries in `event_push_actions_staging` table. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 3fdf128d9e..cdc9ee5a37 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -205,6 +205,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ): super().__init__(database, db_conn, hs) + # Track when the process started. + self._started_ts = self._clock.time_msec() + # These get correctly set by _find_stream_orderings_for_times_txn self.stream_ordering_month_ago: Optional[int] = None self.stream_ordering_day_ago: Optional[int] = None @@ -224,6 +227,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas self._rotate_notifs, 30 * 1000 ) + self._clear_old_staging_loop = self._clock.looping_call( + self._clear_old_push_actions_staging, 30 * 60 * 1000 + ) + self.db_pool.updates.register_background_index_update( "event_push_summary_unique_index", index_name="event_push_summary_unique_index", @@ -791,7 +798,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # can be used to insert into the `event_push_actions_staging` table. def _gen_entry( user_id: str, actions: Collection[Union[Mapping, str]] - ) -> Tuple[str, str, str, int, int, int, str]: + ) -> Tuple[str, str, str, int, int, int, str, int]: is_highlight = 1 if _action_has_highlight(actions) else 0 notif = 1 if "notify" in actions else 0 return ( @@ -802,6 +809,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas is_highlight, # highlight column int(count_as_unread), # unread column thread_id, # thread_id column + self._clock.time_msec(), # inserted_ts column ) await self.db_pool.simple_insert_many( @@ -814,6 +822,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas "highlight", "unread", "thread_id", + "inserted_ts", ), values=[ _gen_entry(user_id, actions) @@ -1340,6 +1349,53 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas if done: break + @wrap_as_background_process("_clear_old_push_actions_staging") + async def _clear_old_push_actions_staging(self) -> None: + """Clear out any old event push actions from the staging table for + events that we failed to persist. + """ + + # We delete anything more than an hour old, on the assumption that we'll + # never take more than an hour to persist an event. + delete_before_ts = self._clock.time_msec() - 60 * 60 * 1000 + + if self._started_ts > delete_before_ts: + # We need to wait for at least an hour before we started deleting, + # so that we know it's safe to delete rows with NULL `inserted_ts`. + return + + # We don't have an index on `inserted_ts`, instead we assume that the + # number of "live" rows in `event_push_actions_staging` is small enough + # that an infrequent periodic scan won't cause a problem. + # + # Note: we also delete any columns with NULL `inserted_ts`, this is safe + # as we added a default value to new rows and so they must be at least + # an hour old. + limit = 1000 + sql = """ + DELETE FROM event_push_actions_staging WHERE event_id IN ( + SELECT event_id FROM event_push_actions_staging WHERE + inserted_ts < ? OR inserted_ts IS NULL + LIMIT ? + ) + """ + + def _clear_old_push_actions_staging_txn(txn: LoggingTransaction) -> bool: + txn.execute(sql, (delete_before_ts, limit)) + return txn.rowcount >= limit + + while True: + # Returns true if we have more stuff to delete from the table. + deleted = await self.db_pool.runInteraction( + "_clear_old_push_actions_staging", _clear_old_push_actions_staging_txn + ) + + if not deleted: + return + + # We sleep to ensure that we don't overwhelm the DB. + await self._clock.sleep(1.0) + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index f29424d17a..4a5c947699 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -85,6 +85,7 @@ Changes in SCHEMA_VERSION = 73; events over federation. - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`, `batch_events`) to make it easy to delete all associated rows when purging a room. + - `inserted_ts` column is added to `event_push_actions_staging` table. """ diff --git a/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres new file mode 100644 index 0000000000..4af1a8470b --- /dev/null +++ b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres @@ -0,0 +1,22 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a column so that we know when a push action was inserted, to make it +-- easier to clear out old ones. +ALTER TABLE event_push_actions_staging ADD COLUMN inserted_ts BIGINT; + +-- We now add a default for *new* rows. We don't do this above as we don't want +-- to have to update every remove with the new default. +ALTER TABLE event_push_actions_staging ALTER COLUMN inserted_ts SET DEFAULT extract(epoch from now()) * 1000; diff --git a/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite new file mode 100644 index 0000000000..7482dabba2 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite @@ -0,0 +1,24 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- On SQLite we must be in monolith mode and updating the database from Synapse, +-- so its safe to assume that `event_push_actions_staging` should be empty (as +-- over restart an event must either have been fully persisted or we'll +-- recalculate the push actions) +DELETE FROM event_push_actions_staging; + +-- Add a column so that we know when a push action was inserted, to make it +-- easier to clear out old ones. +ALTER TABLE event_push_actions_staging ADD COLUMN inserted_ts BIGINT; -- cgit 1.5.1 From b4ec4f5e71a87d5bdc840a4220dfd9a34c54c847 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 4 Oct 2022 09:47:04 -0400 Subject: Track notification counts per thread (implement MSC3773). (#13776) When retrieving counts of notifications segment the results based on the thread ID, but choose whether to return them as individual threads or as a single summed field by letting the client opt-in via a sync flag. The summarization code is also updated to be per thread, instead of per room. --- changelog.d/13776.feature | 1 + synapse/api/constants.py | 3 + synapse/api/filtering.py | 10 ++ synapse/config/experimental.py | 2 + synapse/handlers/sync.py | 40 ++++- synapse/push/bulk_push_rule_evaluator.py | 4 +- synapse/push/push_tools.py | 9 +- synapse/rest/client/sync.py | 4 + synapse/rest/client/versions.py | 3 +- synapse/storage/database.py | 2 +- .../storage/databases/main/event_push_actions.py | 188 +++++++++++++-------- synapse/storage/schema/__init__.py | 6 +- .../delta/73/06thread_notifications_backfill.sql | 29 ++++ .../07thread_notifications_not_null.sql.postgres | 19 +++ .../73/07thread_notifications_not_null.sql.sqlite | 101 +++++++++++ tests/replication/slave/storage/test_events.py | 17 +- tests/storage/test_event_push_actions.py | 169 +++++++++++++++++- 17 files changed, 514 insertions(+), 93 deletions(-) create mode 100644 changelog.d/13776.feature create mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql create mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres create mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite (limited to 'synapse/storage/schema') diff --git a/changelog.d/13776.feature b/changelog.d/13776.feature new file mode 100644 index 0000000000..22bce125ce --- /dev/null +++ b/changelog.d/13776.feature @@ -0,0 +1 @@ +Experimental support for thread-specific notifications ([MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/synapse/api/constants.py b/synapse/api/constants.py index c031903b1a..44c5ffc6a5 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -31,6 +31,9 @@ MAX_ALIAS_LENGTH = 255 # the maximum length for a user id is 255 characters MAX_USERID_LENGTH = 255 +# Constant value used for the pseudo-thread which is the main timeline. +MAIN_TIMELINE: Final = "main" + class Membership: diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index f7f46f8d80..c6e44dcf82 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -84,6 +84,7 @@ ROOM_EVENT_FILTER_SCHEMA = { "contains_url": {"type": "boolean"}, "lazy_load_members": {"type": "boolean"}, "include_redundant_members": {"type": "boolean"}, + "org.matrix.msc3773.unread_thread_notifications": {"type": "boolean"}, # Include or exclude events with the provided labels. # cf https://github.com/matrix-org/matrix-doc/pull/2326 "org.matrix.labels": {"type": "array", "items": {"type": "string"}}, @@ -240,6 +241,9 @@ class FilterCollection: def include_redundant_members(self) -> bool: return self._room_state_filter.include_redundant_members + def unread_thread_notifications(self) -> bool: + return self._room_timeline_filter.unread_thread_notifications + async def filter_presence( self, events: Iterable[UserPresenceState] ) -> List[UserPresenceState]: @@ -304,6 +308,12 @@ class Filter: self.include_redundant_members = filter_json.get( "include_redundant_members", False ) + if hs.config.experimental.msc3773_enabled: + self.unread_thread_notifications: bool = filter_json.get( + "org.matrix.msc3773.unread_thread_notifications", False + ) + else: + self.unread_thread_notifications = False self.types = filter_json.get("types", None) self.not_types = filter_json.get("not_types", []) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 83695f24d9..6503ce6e34 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -99,6 +99,8 @@ class ExperimentalConfig(Config): self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False) # MSC3772: A push rule for mutual relations. self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False) + # MSC3773: Thread notifications + self.msc3773_enabled: bool = experimental.get("msc3773_enabled", False) # MSC3715: dir param on /relations. self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4abb9b6127..329e89c604 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -40,7 +40,7 @@ from synapse.handlers.relations import BundledAggregations from synapse.logging.context import current_context from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span from synapse.push.clientformat import format_push_rules_for_user -from synapse.storage.databases.main.event_push_actions import NotifCounts +from synapse.storage.databases.main.event_push_actions import RoomNotifCounts from synapse.storage.roommember import MemberSummary from synapse.storage.state import StateFilter from synapse.types import ( @@ -128,6 +128,7 @@ class JoinedSyncResult: ephemeral: List[JsonDict] account_data: List[JsonDict] unread_notifications: JsonDict + unread_thread_notifications: JsonDict summary: Optional[JsonDict] unread_count: int @@ -278,6 +279,8 @@ class SyncHandler: self.rooms_to_exclude = hs.config.server.rooms_to_exclude_from_sync + self._msc3773_enabled = hs.config.experimental.msc3773_enabled + async def wait_for_sync_for_user( self, requester: Requester, @@ -1288,7 +1291,7 @@ class SyncHandler: async def unread_notifs_for_room_id( self, room_id: str, sync_config: SyncConfig - ) -> NotifCounts: + ) -> RoomNotifCounts: with Measure(self.clock, "unread_notifs_for_room_id"): return await self.store.get_unread_event_push_actions_by_room_for_user( @@ -2353,6 +2356,7 @@ class SyncHandler: ephemeral=ephemeral, account_data=account_data_events, unread_notifications=unread_notifications, + unread_thread_notifications={}, summary=summary, unread_count=0, ) @@ -2360,10 +2364,36 @@ class SyncHandler: if room_sync or always_include: notifs = await self.unread_notifs_for_room_id(room_id, sync_config) - unread_notifications["notification_count"] = notifs.notify_count - unread_notifications["highlight_count"] = notifs.highlight_count + # Notifications for the main timeline. + notify_count = notifs.main_timeline.notify_count + highlight_count = notifs.main_timeline.highlight_count + unread_count = notifs.main_timeline.unread_count - room_sync.unread_count = notifs.unread_count + # Check the sync configuration. + if ( + self._msc3773_enabled + and sync_config.filter_collection.unread_thread_notifications() + ): + # And add info for each thread. + room_sync.unread_thread_notifications = { + thread_id: { + "notification_count": thread_notifs.notify_count, + "highlight_count": thread_notifs.highlight_count, + } + for thread_id, thread_notifs in notifs.threads.items() + if thread_id is not None + } + + else: + # Combine the unread counts for all threads and main timeline. + for thread_notifs in notifs.threads.values(): + notify_count += thread_notifs.notify_count + highlight_count += thread_notifs.highlight_count + unread_count += thread_notifs.unread_count + + unread_notifications["notification_count"] = notify_count + unread_notifications["highlight_count"] = highlight_count + room_sync.unread_count = unread_count sync_result_builder.joined.append(room_sync) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 4270438918..61d952742d 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -31,7 +31,7 @@ from typing import ( from prometheus_client import Counter -from synapse.api.constants import EventTypes, Membership, RelationTypes +from synapse.api.constants import MAIN_TIMELINE, EventTypes, Membership, RelationTypes from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -280,7 +280,7 @@ class BulkPushRuleEvaluator: # If the event does not have a relation, then cannot have any mutual # relations or thread ID. relations = {} - thread_id = "main" + thread_id = MAIN_TIMELINE if relation: relations = await self._get_mutual_relations( relation.parent_id, diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py index 658bf373b7..edeba27a45 100644 --- a/synapse/push/push_tools.py +++ b/synapse/push/push_tools.py @@ -39,7 +39,12 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - await concurrently_execute(get_room_unread_count, joins, 10) for notifs in room_notifs: - if notifs.notify_count == 0: + # Combine the counts from all the threads. + notify_count = notifs.main_timeline.notify_count + sum( + n.notify_count for n in notifs.threads.values() + ) + + if notify_count == 0: continue if group_by_room: @@ -47,7 +52,7 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - badge += 1 else: # increment the badge count by the number of unread messages in the room - badge += notifs.notify_count + badge += notify_count return badge diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index c2989765ce..f1c23d68e5 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -509,6 +509,10 @@ class SyncRestServlet(RestServlet): ephemeral_events = room.ephemeral result["ephemeral"] = {"events": ephemeral_events} result["unread_notifications"] = room.unread_notifications + if room.unread_thread_notifications: + result[ + "org.matrix.msc3773.unread_thread_notifications" + ] = room.unread_thread_notifications result["summary"] = room.summary if self._msc2654_enabled: result["org.matrix.msc2654.unread_count"] = room.unread_count diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index c95b0d6f19..280d306483 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -103,8 +103,9 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3030": self.config.experimental.msc3030_enabled, # Adds support for thread relations, per MSC3440. "org.matrix.msc3440.stable": True, # TODO: remove when "v1.3" is added above - # Support for thread read receipts. + # Support for thread read receipts & notification counts. "org.matrix.msc3771": self.config.experimental.msc3771_enabled, + "org.matrix.msc3773": self.config.experimental.msc3773_enabled, # Allows moderators to fetch redacted event content as described in MSC2815 "fi.mau.msc2815": self.config.experimental.msc2815_enabled, # Adds support for login token requests as per MSC3882 diff --git a/synapse/storage/database.py b/synapse/storage/database.py index b4469eb964..7bb21f8f81 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -94,7 +94,7 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = { "event_search": "event_search_event_id_idx", "local_media_repository_thumbnails": "local_media_repository_thumbnails_method_idx", "remote_media_cache_thumbnails": "remote_media_repository_thumbnails_method_idx", - "event_push_summary": "event_push_summary_unique_index", + "event_push_summary": "event_push_summary_unique_index2", "receipts_linearized": "receipts_linearized_unique_index", "receipts_graph": "receipts_graph_unique_index", } diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index cdc9ee5a37..3210e9cca1 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -88,7 +88,7 @@ from typing import ( import attr -from synapse.api.constants import ReceiptTypes +from synapse.api.constants import MAIN_TIMELINE, ReceiptTypes from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( @@ -157,7 +157,7 @@ class UserPushAction(EmailPushAction): @attr.s(slots=True, auto_attribs=True) class NotifCounts: """ - The per-user, per-room count of notifications. Used by sync and push. + The per-user, per-room, per-thread count of notifications. Used by sync and push. """ notify_count: int = 0 @@ -165,6 +165,21 @@ class NotifCounts: highlight_count: int = 0 +@attr.s(slots=True, auto_attribs=True) +class RoomNotifCounts: + """ + The per-user, per-room count of notifications. Used by sync and push. + """ + + main_timeline: NotifCounts + # Map of thread ID to the notification counts. + threads: Dict[str, NotifCounts] + + def __len__(self) -> int: + # To properly account for the amount of space in any caches. + return len(self.threads) + 1 + + def _serialize_action( actions: Collection[Union[Mapping, str]], is_highlight: bool ) -> str: @@ -338,12 +353,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return result - @cached(tree=True, max_entries=5000) + @cached(tree=True, max_entries=5000, iterable=True) async def get_unread_event_push_actions_by_room_for_user( self, room_id: str, user_id: str, - ) -> NotifCounts: + ) -> RoomNotifCounts: """Get the notification count, the highlight count and the unread message count for a given user in a given room after their latest read receipt. @@ -356,8 +371,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: The user to retrieve the counts for. Returns - A NotifCounts object containing the notification count, the highlight count - and the unread message count. + A RoomNotifCounts object containing the notification count, the + highlight count and the unread message count for both the main timeline + and threads. """ return await self.db_pool.runInteraction( "get_unread_event_push_actions_by_room", @@ -371,7 +387,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn: LoggingTransaction, room_id: str, user_id: str, - ) -> NotifCounts: + ) -> RoomNotifCounts: # Get the stream ordering of the user's latest receipt in the room. result = self.get_last_unthreaded_receipt_for_user_txn( txn, @@ -406,7 +422,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas room_id: str, user_id: str, receipt_stream_ordering: int, - ) -> NotifCounts: + ) -> RoomNotifCounts: """Get the number of unread messages for a user/room that have happened since the given stream ordering. @@ -418,12 +434,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas receipt in the room. If there are no receipts, the stream ordering of the user's join event. - Returns - A NotifCounts object containing the notification count, the highlight count - and the unread message count. + Returns: + A RoomNotifCounts object containing the notification count, the + highlight count and the unread message count for both the main timeline + and threads. """ - counts = NotifCounts() + main_counts = NotifCounts() + thread_counts: Dict[str, NotifCounts] = {} + + def _get_thread(thread_id: str) -> NotifCounts: + if thread_id == MAIN_TIMELINE: + return main_counts + return thread_counts.setdefault(thread_id, NotifCounts()) # First we pull the counts from the summary table. # @@ -440,52 +463,61 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # receipt). txn.execute( """ - SELECT stream_ordering, notif_count, COALESCE(unread_count, 0) + SELECT stream_ordering, notif_count, COALESCE(unread_count, 0), thread_id FROM event_push_summary WHERE room_id = ? AND user_id = ? AND ( (last_receipt_stream_ordering IS NULL AND stream_ordering > ?) OR last_receipt_stream_ordering = ? - ) + ) AND (notif_count != 0 OR COALESCE(unread_count, 0) != 0) """, (room_id, user_id, receipt_stream_ordering, receipt_stream_ordering), ) - row = txn.fetchone() - - summary_stream_ordering = 0 - if row: - summary_stream_ordering = row[0] - counts.notify_count += row[1] - counts.unread_count += row[2] + max_summary_stream_ordering = 0 + for summary_stream_ordering, notif_count, unread_count, thread_id in txn: + counts = _get_thread(thread_id) + counts.notify_count += notif_count + counts.unread_count += unread_count + + # Summaries will only be used if they have not been invalidated by + # a recent receipt; track the latest stream ordering or a valid summary. + # + # Note that since there's only one read receipt in the room per user, + # valid summaries are contiguous. + max_summary_stream_ordering = max( + summary_stream_ordering, max_summary_stream_ordering + ) # Next we need to count highlights, which aren't summarised sql = """ - SELECT COUNT(*) FROM event_push_actions + SELECT COUNT(*), thread_id FROM event_push_actions WHERE user_id = ? AND room_id = ? AND stream_ordering > ? AND highlight = 1 + GROUP BY thread_id """ txn.execute(sql, (user_id, room_id, receipt_stream_ordering)) - row = txn.fetchone() - if row: - counts.highlight_count += row[0] + for highlight_count, thread_id in txn: + _get_thread(thread_id).highlight_count += highlight_count # Finally we need to count push actions that aren't included in the # summary returned above. This might be due to recent events that haven't # been summarised yet or the summary is out of date due to a recent read # receipt. start_unread_stream_ordering = max( - receipt_stream_ordering, summary_stream_ordering + receipt_stream_ordering, max_summary_stream_ordering ) - notify_count, unread_count = self._get_notif_unread_count_for_user_room( + unread_counts = self._get_notif_unread_count_for_user_room( txn, room_id, user_id, start_unread_stream_ordering ) - counts.notify_count += notify_count - counts.unread_count += unread_count + for notif_count, unread_count, thread_id in unread_counts: + counts = _get_thread(thread_id) + counts.notify_count += notif_count + counts.unread_count += unread_count - return counts + return RoomNotifCounts(main_counts, thread_counts) def _get_notif_unread_count_for_user_room( self, @@ -494,7 +526,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: str, stream_ordering: int, max_stream_ordering: Optional[int] = None, - ) -> Tuple[int, int]: + ) -> List[Tuple[int, int, str]]: """Returns the notify and unread counts from `event_push_actions` for the given user/room in the given range. @@ -510,13 +542,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas If this is not given, then no maximum is applied. Return: - A tuple of the notif count and unread count in the given range. + A tuple of the notif count and unread count in the given range for + each thread. """ # If there have been no events in the room since the stream ordering, # there can't be any push actions either. if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering): - return 0, 0 + return [] clause = "" args = [user_id, room_id, stream_ordering] @@ -527,26 +560,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # If the max stream ordering is less than the min stream ordering, # then obviously there are zero push actions in that range. if max_stream_ordering <= stream_ordering: - return 0, 0 + return [] sql = f""" SELECT COUNT(CASE WHEN notif = 1 THEN 1 END), - COUNT(CASE WHEN unread = 1 THEN 1 END) - FROM event_push_actions ea - WHERE user_id = ? + COUNT(CASE WHEN unread = 1 THEN 1 END), + thread_id + FROM event_push_actions ea + WHERE user_id = ? AND room_id = ? AND ea.stream_ordering > ? {clause} + GROUP BY thread_id """ txn.execute(sql, args) - row = txn.fetchone() - - if row: - return cast(Tuple[int, int], row) - - return 0, 0 + return cast(List[Tuple[int, int, str]], txn.fetchall()) async def get_push_action_users_in_range( self, min_stream_ordering: int, max_stream_ordering: int @@ -1099,26 +1129,34 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # Fetch the notification counts between the stream ordering of the # latest receipt and what was previously summarised. - notif_count, unread_count = self._get_notif_unread_count_for_user_room( + unread_counts = self._get_notif_unread_count_for_user_room( txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering ) - # Replace the previous summary with the new counts. - # - # TODO(threads): Upsert per-thread instead of setting them all to main. - self.db_pool.simple_upsert_txn( + # First mark the summary for all threads in the room as cleared. + self.db_pool.simple_update_txn( txn, table="event_push_summary", - keyvalues={"room_id": room_id, "user_id": user_id}, - values={ - "notif_count": notif_count, - "unread_count": unread_count, + keyvalues={"user_id": user_id, "room_id": room_id}, + updatevalues={ + "notif_count": 0, + "unread_count": 0, "stream_ordering": old_rotate_stream_ordering, "last_receipt_stream_ordering": stream_ordering, - "thread_id": "main", }, ) + # Then any updated threads get their notification count and unread + # count updated. + self.db_pool.simple_update_many_txn( + txn, + table="event_push_summary", + key_names=("room_id", "user_id", "thread_id"), + key_values=[(room_id, user_id, row[2]) for row in unread_counts], + value_names=("notif_count", "unread_count"), + value_values=[(row[0], row[1]) for row in unread_counts], + ) + # We always update `event_push_summary_last_receipt_stream_id` to # ensure that we don't rescan the same receipts for remote users. @@ -1204,23 +1242,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # Calculate the new counts that should be upserted into event_push_summary sql = """ - SELECT user_id, room_id, + SELECT user_id, room_id, thread_id, coalesce(old.%s, 0) + upd.cnt, upd.stream_ordering FROM ( - SELECT user_id, room_id, count(*) as cnt, + SELECT user_id, room_id, thread_id, count(*) as cnt, max(ea.stream_ordering) as stream_ordering FROM event_push_actions AS ea - LEFT JOIN event_push_summary AS old USING (user_id, room_id) + LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id) WHERE ? < ea.stream_ordering AND ea.stream_ordering <= ? AND ( old.last_receipt_stream_ordering IS NULL OR old.last_receipt_stream_ordering < ea.stream_ordering ) AND %s = 1 - GROUP BY user_id, room_id + GROUP BY user_id, room_id, thread_id ) AS upd - LEFT JOIN event_push_summary AS old USING (user_id, room_id) + LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id) """ # First get the count of unread messages. @@ -1234,11 +1272,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # object because we might not have the same amount of rows in each of them. To do # this, we use a dict indexed on the user ID and room ID to make it easier to # populate. - summaries: Dict[Tuple[str, str], _EventPushSummary] = {} + summaries: Dict[Tuple[str, str, str], _EventPushSummary] = {} for row in txn: - summaries[(row[0], row[1])] = _EventPushSummary( - unread_count=row[2], - stream_ordering=row[3], + summaries[(row[0], row[1], row[2])] = _EventPushSummary( + unread_count=row[3], + stream_ordering=row[4], notif_count=0, ) @@ -1249,34 +1287,35 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) for row in txn: - if (row[0], row[1]) in summaries: - summaries[(row[0], row[1])].notif_count = row[2] + if (row[0], row[1], row[2]) in summaries: + summaries[(row[0], row[1], row[2])].notif_count = row[3] else: # Because the rules on notifying are different than the rules on marking # a message unread, we might end up with messages that notify but aren't # marked unread, so we might not have a summary for this (user, room) # tuple to complete. - summaries[(row[0], row[1])] = _EventPushSummary( + summaries[(row[0], row[1], row[2])] = _EventPushSummary( unread_count=0, - stream_ordering=row[3], - notif_count=row[2], + stream_ordering=row[4], + notif_count=row[3], ) logger.info("Rotating notifications, handling %d rows", len(summaries)) - # TODO(threads): Update on a per-thread basis. self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", - key_names=("user_id", "room_id"), - key_values=[(user_id, room_id) for user_id, room_id in summaries], - value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"), + key_names=("user_id", "room_id", "thread_id"), + key_values=[ + (user_id, room_id, thread_id) + for user_id, room_id, thread_id in summaries + ], + value_names=("notif_count", "unread_count", "stream_ordering"), value_values=[ ( summary.notif_count, summary.unread_count, summary.stream_ordering, - "main", ) for summary in summaries.values() ], @@ -1288,7 +1327,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) async def _remove_old_push_actions_that_have_rotated(self) -> None: - """Clear out old push actions that have been summarised.""" + """ + Clear out old push actions that have been summarised (and are older than + 1 day ago). + """ # We want to clear out anything that is older than a day that *has* already # been rotated. diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 4a5c947699..19dbf2da7f 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -90,9 +90,9 @@ Changes in SCHEMA_VERSION = 73; SCHEMA_COMPAT_VERSION = ( - # The groups tables are no longer accessible, so synapses with SCHEMA_VERSION < 72 - # could break. - 72 + # The threads_id column must exist for event_push_actions, event_push_summary, + # receipts_linearized, and receipts_graph. + 73 ) """Limit on how far the synapse codebase can be rolled back without breaking db compat diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql new file mode 100644 index 0000000000..0ffde9bbeb --- /dev/null +++ b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql @@ -0,0 +1,29 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Forces the background updates from 06thread_notifications.sql to run in the +-- foreground as code will now require those to be "done". + +DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id'; + +-- Overwrite any null thread_id columns. +UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL; +UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL; +UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL; + +-- Do not run the event_push_summary_unique_index job if it is pending; the +-- thread_id field will be made required. +DELETE FROM background_updates WHERE update_name = 'event_push_summary_unique_index'; +DROP INDEX IF EXISTS event_push_summary_unique_index; diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres new file mode 100644 index 0000000000..33674f8c62 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres @@ -0,0 +1,19 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- The columns can now be made non-nullable. +ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL; +ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL; +ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL; diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite new file mode 100644 index 0000000000..5322ad77a4 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite @@ -0,0 +1,101 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- SQLite doesn't support modifying columns to an existing table, so it must +-- be recreated. + +-- Create the new tables. +CREATE TABLE event_push_actions_staging_new ( + event_id TEXT NOT NULL, + user_id TEXT NOT NULL, + actions TEXT NOT NULL, + notif SMALLINT NOT NULL, + highlight SMALLINT NOT NULL, + unread SMALLINT, + thread_id TEXT NOT NULL, + inserted_ts BIGINT +); + +CREATE TABLE event_push_actions_new ( + room_id TEXT NOT NULL, + event_id TEXT NOT NULL, + user_id TEXT NOT NULL, + profile_tag VARCHAR(32), + actions TEXT NOT NULL, + topological_ordering BIGINT, + stream_ordering BIGINT, + notif SMALLINT, + highlight SMALLINT, + unread SMALLINT, + thread_id TEXT NOT NULL, + CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) +); + +CREATE TABLE event_push_summary_new ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + notif_count BIGINT NOT NULL, + stream_ordering BIGINT NOT NULL, + unread_count BIGINT, + last_receipt_stream_ordering BIGINT, + thread_id TEXT NOT NULL +); + +-- Swap the indexes. +DROP INDEX IF EXISTS event_push_actions_staging_id; +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging_new(event_id); + +DROP INDEX IF EXISTS event_push_actions_room_id_user_id; +DROP INDEX IF EXISTS event_push_actions_rm_tokens; +DROP INDEX IF EXISTS event_push_actions_stream_ordering; +DROP INDEX IF EXISTS event_push_actions_u_highlight; +DROP INDEX IF EXISTS event_push_actions_highlights_index; +CREATE INDEX event_push_actions_room_id_user_id on event_push_actions_new(room_id, user_id); +CREATE INDEX event_push_actions_rm_tokens on event_push_actions_new( user_id, room_id, topological_ordering, stream_ordering ); +CREATE INDEX event_push_actions_stream_ordering on event_push_actions_new( stream_ordering, user_id ); +CREATE INDEX event_push_actions_u_highlight ON event_push_actions_new (user_id, stream_ordering); +CREATE INDEX event_push_actions_highlights_index ON event_push_actions_new (user_id, room_id, topological_ordering, stream_ordering); + +-- Copy the data. +INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts) + SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts + FROM event_push_actions_staging; + +INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id) + SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id + FROM event_push_actions; + +INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id) + SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id + FROM event_push_summary; + +-- Drop the old tables. +DROP TABLE event_push_actions_staging; +DROP TABLE event_push_actions; +DROP TABLE event_push_summary; + +-- Rename the tables. +ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging; +ALTER TABLE event_push_actions_new RENAME TO event_push_actions; +ALTER TABLE event_push_summary_new RENAME TO event_push_summary; + +-- Re-run background updates from 72/02event_push_actions_index.sql and +-- 72/06thread_notifications.sql. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7307, 'event_push_summary_unique_index2', '{}') + ON CONFLICT (update_name) DO NOTHING; +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7307, 'event_push_actions_stream_highlight_index', '{}') + ON CONFLICT (update_name) DO NOTHING; diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index efd92793c0..d42e36cdf1 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -22,7 +22,10 @@ from synapse.api.room_versions import RoomVersions from synapse.events import FrozenEvent, _EventInternalMetadata, make_event_from_dict from synapse.handlers.room import RoomEventSource from synapse.replication.slave.storage.events import SlavedEventStore -from synapse.storage.databases.main.event_push_actions import NotifCounts +from synapse.storage.databases.main.event_push_actions import ( + NotifCounts, + RoomNotifCounts, +) from synapse.storage.roommember import GetRoomsForUserWithStreamOrdering, RoomsForUser from synapse.types import PersistedEventPosition @@ -178,7 +181,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): self.check( "get_unread_event_push_actions_by_room_for_user", [ROOM_ID, USER_ID_2], - NotifCounts(highlight_count=0, unread_count=0, notify_count=0), + RoomNotifCounts( + NotifCounts(highlight_count=0, unread_count=0, notify_count=0), {} + ), ) self.persist( @@ -191,7 +196,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): self.check( "get_unread_event_push_actions_by_room_for_user", [ROOM_ID, USER_ID_2], - NotifCounts(highlight_count=0, unread_count=0, notify_count=1), + RoomNotifCounts( + NotifCounts(highlight_count=0, unread_count=0, notify_count=1), {} + ), ) self.persist( @@ -206,7 +213,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): self.check( "get_unread_event_push_actions_by_room_for_user", [ROOM_ID, USER_ID_2], - NotifCounts(highlight_count=1, unread_count=0, notify_count=2), + RoomNotifCounts( + NotifCounts(highlight_count=1, unread_count=0, notify_count=2), {} + ), ) def test_get_rooms_for_user_with_stream_ordering(self): diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 473c965e19..89f986ac34 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple +from typing import Optional, Tuple from twisted.test.proto_helpers import MemoryReactor @@ -20,6 +20,7 @@ from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.event_push_actions import NotifCounts +from synapse.types import JsonDict from synapse.util import Clock from tests.unittest import HomeserverTestCase @@ -133,13 +134,14 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): ) ) self.assertEqual( - counts, + counts.main_timeline, NotifCounts( notify_count=noitf_count, unread_count=0, highlight_count=highlight_count, ), ) + self.assertEqual(counts.threads, {}) def _create_event(highlight: bool = False) -> str: result = self.helper.send_event( @@ -186,6 +188,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): _assert_counts(0, 0) _create_event() + _assert_counts(1, 0) _rotate() _assert_counts(1, 0) @@ -236,6 +239,168 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): _rotate() _assert_counts(0, 0) + def test_count_aggregation_threads(self) -> None: + """ + This is essentially the same test as test_count_aggregation, but adds + events to the main timeline and to a thread. + """ + + user_id, token, _, other_token, room_id = self._create_users_and_room() + thread_id: str + + last_event_id: str + + def _assert_counts( + noitf_count: int, + highlight_count: int, + thread_notif_count: int, + thread_highlight_count: int, + ) -> None: + counts = self.get_success( + self.store.db_pool.runInteraction( + "get-unread-counts", + self.store._get_unread_counts_by_receipt_txn, + room_id, + user_id, + ) + ) + self.assertEqual( + counts.main_timeline, + NotifCounts( + notify_count=noitf_count, + unread_count=0, + highlight_count=highlight_count, + ), + ) + if thread_notif_count or thread_highlight_count: + self.assertEqual( + counts.threads, + { + thread_id: NotifCounts( + notify_count=thread_notif_count, + unread_count=0, + highlight_count=thread_highlight_count, + ), + }, + ) + else: + self.assertEqual(counts.threads, {}) + + def _create_event( + highlight: bool = False, thread_id: Optional[str] = None + ) -> str: + content: JsonDict = { + "msgtype": "m.text", + "body": user_id if highlight else "msg", + } + if thread_id: + content["m.relates_to"] = { + "rel_type": "m.thread", + "event_id": thread_id, + } + + result = self.helper.send_event( + room_id, + type="m.room.message", + content=content, + tok=other_token, + ) + nonlocal last_event_id + last_event_id = result["event_id"] + return last_event_id + + def _rotate() -> None: + self.get_success(self.store._rotate_notifs()) + + def _mark_read(event_id: str, thread_id: Optional[str] = None) -> None: + self.get_success( + self.store.insert_receipt( + room_id, + "m.read", + user_id=user_id, + event_ids=[event_id], + thread_id=thread_id, + data={}, + ) + ) + + _assert_counts(0, 0, 0, 0) + thread_id = _create_event() + _assert_counts(1, 0, 0, 0) + _rotate() + _assert_counts(1, 0, 0, 0) + + _create_event(thread_id=thread_id) + _assert_counts(1, 0, 1, 0) + _rotate() + _assert_counts(1, 0, 1, 0) + + _create_event() + _assert_counts(2, 0, 1, 0) + _rotate() + _assert_counts(2, 0, 1, 0) + + event_id = _create_event(thread_id=thread_id) + _assert_counts(2, 0, 2, 0) + _rotate() + _assert_counts(2, 0, 2, 0) + + _create_event() + _create_event(thread_id=thread_id) + _mark_read(event_id) + _assert_counts(1, 0, 1, 0) + + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + + _create_event() + _create_event(thread_id=thread_id) + _assert_counts(1, 0, 1, 0) + _rotate() + _assert_counts(1, 0, 1, 0) + + # Delete old event push actions, this should not affect the (summarised) count. + self.get_success(self.store._remove_old_push_actions_that_have_rotated()) + _assert_counts(1, 0, 1, 0) + + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + + _create_event(True) + _assert_counts(1, 1, 0, 0) + _rotate() + _assert_counts(1, 1, 0, 0) + + event_id = _create_event(True, thread_id) + _assert_counts(1, 1, 1, 1) + _rotate() + _assert_counts(1, 1, 1, 1) + + # Check that adding another notification and rotating after highlight + # works. + _create_event() + _rotate() + _assert_counts(2, 1, 1, 1) + + _create_event(thread_id=thread_id) + _rotate() + _assert_counts(2, 1, 2, 1) + + # Check that sending read receipts at different points results in the + # right counts. + _mark_read(event_id) + _assert_counts(1, 0, 1, 0) + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + + _create_event(True) + _create_event(True, thread_id) + _assert_counts(1, 1, 1, 1) + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + _rotate() + _assert_counts(0, 0, 0, 0) + def test_find_first_stream_ordering_after_ts(self) -> None: def add_event(so: int, ts: int) -> None: self.get_success( -- cgit 1.5.1 From a7ba457b2b967ca098792d742bc304604b1824b7 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 4 Oct 2022 10:46:42 -0400 Subject: Mark events as read using threaded read receipts from MSC3771. (#13877) Applies the proper logic for unthreaded and threaded receipts to either apply to all events in the room or only events in the same thread, respectively. --- changelog.d/13877.feature | 1 + .../storage/databases/main/event_push_actions.py | 277 ++++++++++++++++----- .../73/08thread_receipts_non_null.sql.postgres | 23 ++ .../delta/73/08thread_receipts_non_null.sql.sqlite | 76 ++++++ tests/storage/test_event_push_actions.py | 189 +++++++++++++- 5 files changed, 504 insertions(+), 62 deletions(-) create mode 100644 changelog.d/13877.feature create mode 100644 synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres create mode 100644 synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite (limited to 'synapse/storage/schema') diff --git a/changelog.d/13877.feature b/changelog.d/13877.feature new file mode 100644 index 0000000000..d0cb902dff --- /dev/null +++ b/changelog.d/13877.feature @@ -0,0 +1 @@ +Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)). diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 3210e9cca1..7469cd336c 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -421,7 +421,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn: LoggingTransaction, room_id: str, user_id: str, - receipt_stream_ordering: int, + unthreaded_receipt_stream_ordering: int, ) -> RoomNotifCounts: """Get the number of unread messages for a user/room that have happened since the given stream ordering. @@ -430,9 +430,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn: The database transaction. room_id: The room ID to get unread counts for. user_id: The user ID to get unread counts for. - receipt_stream_ordering: The stream ordering of the user's latest - receipt in the room. If there are no receipts, the stream ordering - of the user's join event. + unthreaded_receipt_stream_ordering: The stream ordering of the user's latest + unthreaded receipt in the room. If there are no unthreaded receipts, + the stream ordering of the user's join event. Returns: A RoomNotifCounts object containing the notification count, the @@ -448,71 +448,181 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return main_counts return thread_counts.setdefault(thread_id, NotifCounts()) + receipt_types_clause, receipts_args = make_in_list_sql_clause( + self.database_engine, + "receipt_type", + (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), + ) + # First we pull the counts from the summary table. # - # We check that `last_receipt_stream_ordering` matches the stream - # ordering given. If it doesn't match then a new read receipt has arrived and - # we haven't yet updated the counts in `event_push_summary` to reflect - # that; in that case we simply ignore `event_push_summary` counts - # and do a manual count of all of the rows in the `event_push_actions` table - # for this user/room. + # We check that `last_receipt_stream_ordering` matches the stream ordering of the + # latest receipt for the thread (which may be either the unthreaded read receipt + # or the threaded read receipt). # - # If `last_receipt_stream_ordering` is null then that means it's up to - # date (as the row was written by an older version of Synapse that + # If it doesn't match then a new read receipt has arrived and we haven't yet + # updated the counts in `event_push_summary` to reflect that; in that case we + # simply ignore `event_push_summary` counts. + # + # We then do a manual count of all the rows in the `event_push_actions` table + # for any user/room/thread which did not have a valid summary found. + # + # If `last_receipt_stream_ordering` is null then that means it's up-to-date + # (as the row was written by an older version of Synapse that # updated `event_push_summary` synchronously when persisting a new read # receipt). txn.execute( - """ - SELECT stream_ordering, notif_count, COALESCE(unread_count, 0), thread_id + f""" + SELECT notif_count, COALESCE(unread_count, 0), thread_id FROM event_push_summary + LEFT JOIN ( + SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering + FROM receipts_linearized + LEFT JOIN events USING (room_id, event_id) + WHERE + user_id = ? + AND room_id = ? + AND stream_ordering > ? + AND {receipt_types_clause} + GROUP BY thread_id + ) AS receipts USING (thread_id) WHERE room_id = ? AND user_id = ? AND ( - (last_receipt_stream_ordering IS NULL AND stream_ordering > ?) - OR last_receipt_stream_ordering = ? + (last_receipt_stream_ordering IS NULL AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?)) + OR last_receipt_stream_ordering = COALESCE(threaded_receipt_stream_ordering, ?) ) AND (notif_count != 0 OR COALESCE(unread_count, 0) != 0) """, - (room_id, user_id, receipt_stream_ordering, receipt_stream_ordering), + ( + user_id, + room_id, + unthreaded_receipt_stream_ordering, + *receipts_args, + room_id, + user_id, + unthreaded_receipt_stream_ordering, + unthreaded_receipt_stream_ordering, + ), ) - max_summary_stream_ordering = 0 - for summary_stream_ordering, notif_count, unread_count, thread_id in txn: + summarised_threads = set() + for notif_count, unread_count, thread_id in txn: + summarised_threads.add(thread_id) counts = _get_thread(thread_id) counts.notify_count += notif_count counts.unread_count += unread_count - # Summaries will only be used if they have not been invalidated by - # a recent receipt; track the latest stream ordering or a valid summary. - # - # Note that since there's only one read receipt in the room per user, - # valid summaries are contiguous. - max_summary_stream_ordering = max( - summary_stream_ordering, max_summary_stream_ordering - ) - # Next we need to count highlights, which aren't summarised - sql = """ + sql = f""" SELECT COUNT(*), thread_id FROM event_push_actions + LEFT JOIN ( + SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering + FROM receipts_linearized + LEFT JOIN events USING (room_id, event_id) + WHERE + user_id = ? + AND room_id = ? + AND stream_ordering > ? + AND {receipt_types_clause} + GROUP BY thread_id + ) AS receipts USING (thread_id) WHERE user_id = ? AND room_id = ? - AND stream_ordering > ? + AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?) AND highlight = 1 GROUP BY thread_id """ - txn.execute(sql, (user_id, room_id, receipt_stream_ordering)) + txn.execute( + sql, + ( + user_id, + room_id, + unthreaded_receipt_stream_ordering, + *receipts_args, + user_id, + room_id, + unthreaded_receipt_stream_ordering, + ), + ) for highlight_count, thread_id in txn: _get_thread(thread_id).highlight_count += highlight_count + # For threads which were summarised we need to count actions since the last + # rotation. + thread_id_clause, thread_id_args = make_in_list_sql_clause( + self.database_engine, "thread_id", summarised_threads + ) + + # The (inclusive) event stream ordering that was previously summarised. + rotated_upto_stream_ordering = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_push_summary_stream_ordering", + keyvalues={}, + retcol="stream_ordering", + ) + + unread_counts = self._get_notif_unread_count_for_user_room( + txn, room_id, user_id, rotated_upto_stream_ordering + ) + for notif_count, unread_count, thread_id in unread_counts: + if thread_id not in summarised_threads: + continue + + if thread_id == MAIN_TIMELINE: + counts.notify_count += notif_count + counts.unread_count += unread_count + elif thread_id in thread_counts: + thread_counts[thread_id].notify_count += notif_count + thread_counts[thread_id].unread_count += unread_count + else: + # Previous thread summaries of 0 are discarded above. + # + # TODO If empty summaries are deleted this can be removed. + thread_counts[thread_id] = NotifCounts( + notify_count=notif_count, + unread_count=unread_count, + highlight_count=0, + ) + # Finally we need to count push actions that aren't included in the # summary returned above. This might be due to recent events that haven't # been summarised yet or the summary is out of date due to a recent read # receipt. - start_unread_stream_ordering = max( - receipt_stream_ordering, max_summary_stream_ordering - ) - unread_counts = self._get_notif_unread_count_for_user_room( - txn, room_id, user_id, start_unread_stream_ordering + sql = f""" + SELECT + COUNT(CASE WHEN notif = 1 THEN 1 END), + COUNT(CASE WHEN unread = 1 THEN 1 END), + thread_id + FROM event_push_actions + LEFT JOIN ( + SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering + FROM receipts_linearized + LEFT JOIN events USING (room_id, event_id) + WHERE + user_id = ? + AND room_id = ? + AND stream_ordering > ? + AND {receipt_types_clause} + GROUP BY thread_id + ) AS receipts USING (thread_id) + WHERE user_id = ? + AND room_id = ? + AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?) + AND NOT {thread_id_clause} + GROUP BY thread_id + """ + txn.execute( + sql, + ( + user_id, + room_id, + unthreaded_receipt_stream_ordering, + *receipts_args, + user_id, + room_id, + unthreaded_receipt_stream_ordering, + *thread_id_args, + ), ) - - for notif_count, unread_count, thread_id in unread_counts: + for notif_count, unread_count, thread_id in txn: counts = _get_thread(thread_id) counts.notify_count += notif_count counts.unread_count += unread_count @@ -526,6 +636,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: str, stream_ordering: int, max_stream_ordering: Optional[int] = None, + thread_id: Optional[str] = None, ) -> List[Tuple[int, int, str]]: """Returns the notify and unread counts from `event_push_actions` for the given user/room in the given range. @@ -540,6 +651,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas stream_ordering: The (exclusive) minimum stream ordering to consider. max_stream_ordering: The (inclusive) maximum stream ordering to consider. If this is not given, then no maximum is applied. + thread_id: The thread ID to fetch unread counts for. If this is not provided + then the results for *all* threads is returned. + + Note that if this is provided the resulting list will only have 0 or + 1 tuples in it. Return: A tuple of the notif count and unread count in the given range for @@ -551,10 +667,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering): return [] - clause = "" + stream_ordering_clause = "" args = [user_id, room_id, stream_ordering] if max_stream_ordering is not None: - clause = "AND ea.stream_ordering <= ?" + stream_ordering_clause = "AND ea.stream_ordering <= ?" args.append(max_stream_ordering) # If the max stream ordering is less than the min stream ordering, @@ -562,6 +678,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas if max_stream_ordering <= stream_ordering: return [] + # Either limit the results to a specific thread or fetch all threads. + thread_id_clause = "" + if thread_id is not None: + thread_id_clause = "AND thread_id = ?" + args.append(thread_id) + sql = f""" SELECT COUNT(CASE WHEN notif = 1 THEN 1 END), @@ -571,7 +693,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas WHERE user_id = ? AND room_id = ? AND ea.stream_ordering > ? - {clause} + {stream_ordering_clause} + {thread_id_clause} GROUP BY thread_id """ @@ -1086,7 +1209,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) sql = """ - SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering + SELECT r.stream_id, r.room_id, r.user_id, r.thread_id, e.stream_ordering FROM receipts_linearized AS r INNER JOIN events AS e USING (event_id) WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ? @@ -1107,45 +1230,69 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas limit, ), ) - rows = cast(List[Tuple[int, str, str, int]], txn.fetchall()) + rows = cast(List[Tuple[int, str, str, Optional[str], int]], txn.fetchall()) # For each new read receipt we delete push actions from before it and # recalculate the summary. - for _, room_id, user_id, stream_ordering in rows: + # + # Care must be taken of whether it is a threaded or unthreaded receipt. + for _, room_id, user_id, thread_id, stream_ordering in rows: # Only handle our own read receipts. if not self.hs.is_mine_id(user_id): continue + thread_clause = "" + thread_args: Tuple = () + if thread_id is not None: + thread_clause = "AND thread_id = ?" + thread_args = (thread_id,) + + # For each new read receipt we delete push actions from before it and + # recalculate the summary. txn.execute( - """ + f""" DELETE FROM event_push_actions WHERE room_id = ? AND user_id = ? AND stream_ordering <= ? AND highlight = 0 + {thread_clause} """, - (room_id, user_id, stream_ordering), + (room_id, user_id, stream_ordering, *thread_args), ) # Fetch the notification counts between the stream ordering of the # latest receipt and what was previously summarised. unread_counts = self._get_notif_unread_count_for_user_room( - txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering - ) - - # First mark the summary for all threads in the room as cleared. - self.db_pool.simple_update_txn( txn, - table="event_push_summary", - keyvalues={"user_id": user_id, "room_id": room_id}, - updatevalues={ - "notif_count": 0, - "unread_count": 0, - "stream_ordering": old_rotate_stream_ordering, - "last_receipt_stream_ordering": stream_ordering, - }, + room_id, + user_id, + stream_ordering, + old_rotate_stream_ordering, + thread_id, ) + # For an unthreaded receipt, mark the summary for all threads in the room + # as cleared. + if thread_id is None: + self.db_pool.simple_update_txn( + txn, + table="event_push_summary", + keyvalues={"user_id": user_id, "room_id": room_id}, + updatevalues={ + "notif_count": 0, + "unread_count": 0, + "stream_ordering": old_rotate_stream_ordering, + "last_receipt_stream_ordering": stream_ordering, + }, + ) + + # For a threaded receipt, we *always* want to update that receipt, + # event if there are no new notifications in that thread. This ensures + # the stream_ordering & last_receipt_stream_ordering are updated. + elif not unread_counts: + unread_counts = [(0, 0, thread_id)] + # Then any updated threads get their notification count and unread # count updated. self.db_pool.simple_update_many_txn( @@ -1153,8 +1300,16 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas table="event_push_summary", key_names=("room_id", "user_id", "thread_id"), key_values=[(room_id, user_id, row[2]) for row in unread_counts], - value_names=("notif_count", "unread_count"), - value_values=[(row[0], row[1]) for row in unread_counts], + value_names=( + "notif_count", + "unread_count", + "stream_ordering", + "last_receipt_stream_ordering", + ), + value_values=[ + (row[0], row[1], old_rotate_stream_ordering, stream_ordering) + for row in unread_counts + ], ) # We always update `event_push_summary_last_receipt_stream_id` to diff --git a/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres new file mode 100644 index 0000000000..3e0bc9e5eb --- /dev/null +++ b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres @@ -0,0 +1,23 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Drop constraint on (room_id, receipt_type, user_id). + +-- Rebuild the unique constraint with the thread_id. +ALTER TABLE receipts_linearized + DROP CONSTRAINT receipts_linearized_uniqueness; + +ALTER TABLE receipts_graph + DROP CONSTRAINT receipts_graph_uniqueness; diff --git a/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite new file mode 100644 index 0000000000..e664889fbc --- /dev/null +++ b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite @@ -0,0 +1,76 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Drop constraint on (room_id, receipt_type, user_id). +-- +-- SQLite doesn't support modifying constraints to an existing table, so it must +-- be recreated. + +-- Create the new tables. +CREATE TABLE receipts_linearized_new ( + stream_id BIGINT NOT NULL, + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL, + thread_id TEXT, + event_stream_ordering BIGINT, + data TEXT NOT NULL, + CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); + +CREATE TABLE receipts_graph_new ( + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_ids TEXT NOT NULL, + thread_id TEXT, + data TEXT NOT NULL, + CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); + +-- Drop the old indexes. +DROP INDEX IF EXISTS receipts_linearized_id; +DROP INDEX IF EXISTS receipts_linearized_room_stream; +DROP INDEX IF EXISTS receipts_linearized_user; + +-- Copy the data. +INSERT INTO receipts_linearized_new (stream_id, room_id, receipt_type, user_id, event_id, data) + SELECT stream_id, room_id, receipt_type, user_id, event_id, data + FROM receipts_linearized; +INSERT INTO receipts_graph_new (room_id, receipt_type, user_id, event_ids, data) + SELECT room_id, receipt_type, user_id, event_ids, data + FROM receipts_graph; + +-- Drop the old tables. +DROP TABLE receipts_linearized; +DROP TABLE receipts_graph; + +-- Rename the tables. +ALTER TABLE receipts_linearized_new RENAME TO receipts_linearized; +ALTER TABLE receipts_graph_new RENAME TO receipts_graph; + +-- Create the indices. +CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id ); +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id ); +CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id ); + +-- Re-run background updates from 72/08thread_receipts.sql. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7308, 'receipts_linearized_unique_index', '{}') + ON CONFLICT (update_name) DO NOTHING; +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7308, 'receipts_graph_unique_index', '{}') + ON CONFLICT (update_name) DO NOTHING; diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 89f986ac34..6fa0cafb75 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -16,6 +16,7 @@ from typing import Optional, Tuple from twisted.test.proto_helpers import MemoryReactor +from synapse.api.constants import MAIN_TIMELINE from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer @@ -312,7 +313,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): def _rotate() -> None: self.get_success(self.store._rotate_notifs()) - def _mark_read(event_id: str, thread_id: Optional[str] = None) -> None: + def _mark_read(event_id: str, thread_id: str = MAIN_TIMELINE) -> None: self.get_success( self.store.insert_receipt( room_id, @@ -348,9 +349,12 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): _create_event() _create_event(thread_id=thread_id) _mark_read(event_id) + _assert_counts(1, 0, 3, 0) + _mark_read(event_id, thread_id) _assert_counts(1, 0, 1, 0) _mark_read(last_event_id) + _mark_read(last_event_id, thread_id) _assert_counts(0, 0, 0, 0) _create_event() @@ -364,6 +368,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): _assert_counts(1, 0, 1, 0) _mark_read(last_event_id) + _mark_read(last_event_id, thread_id) _assert_counts(0, 0, 0, 0) _create_event(True) @@ -389,8 +394,190 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): # Check that sending read receipts at different points results in the # right counts. _mark_read(event_id) + _assert_counts(1, 0, 2, 1) + _mark_read(event_id, thread_id) _assert_counts(1, 0, 1, 0) _mark_read(last_event_id) + _assert_counts(0, 0, 1, 0) + _mark_read(last_event_id, thread_id) + _assert_counts(0, 0, 0, 0) + + _create_event(True) + _create_event(True, thread_id) + _assert_counts(1, 1, 1, 1) + _mark_read(last_event_id) + _mark_read(last_event_id, thread_id) + _assert_counts(0, 0, 0, 0) + _rotate() + _assert_counts(0, 0, 0, 0) + + def test_count_aggregation_mixed(self) -> None: + """ + This is essentially the same test as test_count_aggregation_threads, but + sends both unthreaded and threaded receipts. + """ + + # Create a user to receive notifications and send receipts. + user_id = self.register_user("user1235", "pass") + token = self.login("user1235", "pass") + + # And another users to send events. + other_id = self.register_user("other", "pass") + other_token = self.login("other", "pass") + + # Create a room and put both users in it. + room_id = self.helper.create_room_as(user_id, tok=token) + self.helper.join(room_id, other_id, tok=other_token) + thread_id: str + + last_event_id: str + + def _assert_counts( + noitf_count: int, + highlight_count: int, + thread_notif_count: int, + thread_highlight_count: int, + ) -> None: + counts = self.get_success( + self.store.db_pool.runInteraction( + "get-unread-counts", + self.store._get_unread_counts_by_receipt_txn, + room_id, + user_id, + ) + ) + self.assertEqual( + counts.main_timeline, + NotifCounts( + notify_count=noitf_count, + unread_count=0, + highlight_count=highlight_count, + ), + ) + if thread_notif_count or thread_highlight_count: + self.assertEqual( + counts.threads, + { + thread_id: NotifCounts( + notify_count=thread_notif_count, + unread_count=0, + highlight_count=thread_highlight_count, + ), + }, + ) + else: + self.assertEqual(counts.threads, {}) + + def _create_event( + highlight: bool = False, thread_id: Optional[str] = None + ) -> str: + content: JsonDict = { + "msgtype": "m.text", + "body": user_id if highlight else "msg", + } + if thread_id: + content["m.relates_to"] = { + "rel_type": "m.thread", + "event_id": thread_id, + } + + result = self.helper.send_event( + room_id, + type="m.room.message", + content=content, + tok=other_token, + ) + nonlocal last_event_id + last_event_id = result["event_id"] + return last_event_id + + def _rotate() -> None: + self.get_success(self.store._rotate_notifs()) + + def _mark_read(event_id: str, thread_id: Optional[str] = None) -> None: + self.get_success( + self.store.insert_receipt( + room_id, + "m.read", + user_id=user_id, + event_ids=[event_id], + thread_id=thread_id, + data={}, + ) + ) + + _assert_counts(0, 0, 0, 0) + thread_id = _create_event() + _assert_counts(1, 0, 0, 0) + _rotate() + _assert_counts(1, 0, 0, 0) + + _create_event(thread_id=thread_id) + _assert_counts(1, 0, 1, 0) + _rotate() + _assert_counts(1, 0, 1, 0) + + _create_event() + _assert_counts(2, 0, 1, 0) + _rotate() + _assert_counts(2, 0, 1, 0) + + event_id = _create_event(thread_id=thread_id) + _assert_counts(2, 0, 2, 0) + _rotate() + _assert_counts(2, 0, 2, 0) + + _create_event() + _create_event(thread_id=thread_id) + _mark_read(event_id) + _assert_counts(1, 0, 1, 0) + + _mark_read(last_event_id, MAIN_TIMELINE) + _mark_read(last_event_id, thread_id) + _assert_counts(0, 0, 0, 0) + + _create_event() + _create_event(thread_id=thread_id) + _assert_counts(1, 0, 1, 0) + _rotate() + _assert_counts(1, 0, 1, 0) + + # Delete old event push actions, this should not affect the (summarised) count. + self.get_success(self.store._remove_old_push_actions_that_have_rotated()) + _assert_counts(1, 0, 1, 0) + + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + + _create_event(True) + _assert_counts(1, 1, 0, 0) + _rotate() + _assert_counts(1, 1, 0, 0) + + event_id = _create_event(True, thread_id) + _assert_counts(1, 1, 1, 1) + _rotate() + _assert_counts(1, 1, 1, 1) + + # Check that adding another notification and rotating after highlight + # works. + _create_event() + _rotate() + _assert_counts(2, 1, 1, 1) + + _create_event(thread_id=thread_id) + _rotate() + _assert_counts(2, 1, 2, 1) + + # Check that sending read receipts at different points results in the + # right counts. + _mark_read(event_id) + _assert_counts(1, 0, 1, 0) + _mark_read(event_id, MAIN_TIMELINE) + _assert_counts(1, 0, 1, 0) + _mark_read(last_event_id, MAIN_TIMELINE) + _assert_counts(0, 0, 1, 0) + _mark_read(last_event_id, thread_id) _assert_counts(0, 0, 0, 0) _create_event(True) -- cgit 1.5.1 From 3bbe532abb7bfc41467597731ac1a18c0331f539 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 13 Oct 2022 08:02:11 -0400 Subject: Add an API for listing threads in a room. (#13394) Implement the /threads endpoint from MSC3856. This is currently unstable and behind an experimental configuration flag. It includes a background update to backfill data, results from the /threads endpoint will be partial until that finishes. --- changelog.d/13394.feature | 1 + synapse/_scripts/synapse_port_db.py | 2 + synapse/config/experimental.py | 3 + synapse/handlers/relations.py | 86 ++++++++++- synapse/rest/client/relations.py | 50 ++++++- synapse/storage/databases/main/cache.py | 1 + synapse/storage/databases/main/events.py | 38 ++++- synapse/storage/databases/main/relations.py | 166 ++++++++++++++++++++- .../schema/main/delta/73/09threads_table.sql | 30 ++++ tests/rest/client/test_relations.py | 151 +++++++++++++++++++ 10 files changed, 522 insertions(+), 6 deletions(-) create mode 100644 changelog.d/13394.feature create mode 100644 synapse/storage/schema/main/delta/73/09threads_table.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13394.feature b/changelog.d/13394.feature new file mode 100644 index 0000000000..68de079cf3 --- /dev/null +++ b/changelog.d/13394.feature @@ -0,0 +1 @@ +Experimental support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 5fa599e70e..d850e54e17 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -72,6 +72,7 @@ from synapse.storage.databases.main.registration import ( RegistrationBackgroundUpdateStore, find_max_generated_user_id_localpart, ) +from synapse.storage.databases.main.relations import RelationsWorkerStore from synapse.storage.databases.main.room import RoomBackgroundUpdateStore from synapse.storage.databases.main.roommember import RoomMemberBackgroundUpdateStore from synapse.storage.databases.main.search import SearchBackgroundUpdateStore @@ -206,6 +207,7 @@ class Store( PusherWorkerStore, PresenceBackgroundUpdateStore, ReceiptsBackgroundUpdateStore, + RelationsWorkerStore, ): def execute(self, f: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]: return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index f44655516e..1860006536 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -101,6 +101,9 @@ class ExperimentalConfig(Config): # MSC3848: Introduce errcodes for specific event sending failures self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False) + # MSC3856: Threads list API + self.msc3856_enabled: bool = experimental.get("msc3856_enabled", False) + # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices. self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False) diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index cc5e45c241..1fdd7a10bc 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import enum import logging from typing import TYPE_CHECKING, Dict, FrozenSet, Iterable, List, Optional, Tuple @@ -20,7 +21,7 @@ from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError from synapse.events import EventBase, relation_from_event from synapse.logging.opentracing import trace -from synapse.storage.databases.main.relations import _RelatedEvent +from synapse.storage.databases.main.relations import ThreadsNextBatch, _RelatedEvent from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, Requester, StreamToken, UserID from synapse.visibility import filter_events_for_client @@ -32,6 +33,13 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +class ThreadsListInclude(str, enum.Enum): + """Valid values for the 'include' flag of /threads.""" + + all = "all" + participated = "participated" + + @attr.s(slots=True, frozen=True, auto_attribs=True) class _ThreadAggregation: # The latest event in the thread. @@ -482,3 +490,79 @@ class RelationsHandler: results.setdefault(event_id, BundledAggregations()).replace = edit return results + + async def get_threads( + self, + requester: Requester, + room_id: str, + include: ThreadsListInclude, + limit: int = 5, + from_token: Optional[ThreadsNextBatch] = None, + ) -> JsonDict: + """Get related events of a event, ordered by topological ordering. + + Args: + requester: The user requesting the relations. + room_id: The room the event belongs to. + include: One of "all" or "participated" to indicate which threads should + be returned. + limit: Only fetch the most recent `limit` events. + from_token: Fetch rows from the given token, or from the start if None. + + Returns: + The pagination chunk. + """ + + user_id = requester.user.to_string() + + # TODO Properly handle a user leaving a room. + (_, member_event_id) = await self._auth.check_user_in_room_or_world_readable( + room_id, requester, allow_departed_users=True + ) + + # Note that ignored users are not passed into get_relations_for_event + # below. Ignored users are handled in filter_events_for_client (and by + # not passing them in here we should get a better cache hit rate). + thread_roots, next_batch = await self._main_store.get_threads( + room_id=room_id, limit=limit, from_token=from_token + ) + + events = await self._main_store.get_events_as_list(thread_roots) + + if include == ThreadsListInclude.participated: + # Pre-seed thread participation with whether the requester sent the event. + participated = {event.event_id: event.sender == user_id for event in events} + # For events the requester did not send, check the database for whether + # the requester sent a threaded reply. + participated.update( + await self._main_store.get_threads_participated( + [eid for eid, p in participated.items() if not p], + user_id, + ) + ) + + # Limit the returned threads to those the user has participated in. + events = [event for event in events if participated[event.event_id]] + + events = await filter_events_for_client( + self._storage_controllers, + user_id, + events, + is_peeking=(member_event_id is None), + ) + + aggregations = await self.get_bundled_aggregations( + events, requester.user.to_string() + ) + + now = self._clock.time_msec() + serialized_events = self._event_serializer.serialize_events( + events, now, bundle_aggregations=aggregations + ) + + return_value: JsonDict = {"chunk": serialized_events} + + if next_batch: + return_value["next_batch"] = str(next_batch) + + return return_value diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index b31ce5a0d3..d1aa1947a5 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -13,12 +13,15 @@ # limitations under the License. import logging +import re from typing import TYPE_CHECKING, Optional, Tuple +from synapse.handlers.relations import ThreadsListInclude from synapse.http.server import HttpServer -from synapse.http.servlet import RestServlet +from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.rest.client._base import client_patterns +from synapse.storage.databases.main.relations import ThreadsNextBatch from synapse.streams.config import PaginationConfig from synapse.types import JsonDict @@ -78,5 +81,50 @@ class RelationPaginationServlet(RestServlet): return 200, result +class ThreadsServlet(RestServlet): + PATTERNS = ( + re.compile( + "^/_matrix/client/unstable/org.matrix.msc3856/rooms/(?P[^/]*)/threads" + ), + ) + + def __init__(self, hs: "HomeServer"): + super().__init__() + self.auth = hs.get_auth() + self.store = hs.get_datastores().main + self._relations_handler = hs.get_relations_handler() + + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request) + + limit = parse_integer(request, "limit", default=5) + from_token_str = parse_string(request, "from") + include = parse_string( + request, + "include", + default=ThreadsListInclude.all.value, + allowed_values=[v.value for v in ThreadsListInclude], + ) + + # Return the relations + from_token = None + if from_token_str: + from_token = ThreadsNextBatch.from_string(from_token_str) + + result = await self._relations_handler.get_threads( + requester=requester, + room_id=room_id, + include=ThreadsListInclude(include), + limit=limit, + from_token=from_token, + ) + + return 200, result + + def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: RelationPaginationServlet(hs).register(http_server) + if hs.config.experimental.msc3856_enabled: + ThreadsServlet(hs).register(http_server) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index a9f25a5904..0ce3156c9c 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -259,6 +259,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore): self._attempt_to_invalidate_cache("get_applicable_edit", (relates_to,)) self._attempt_to_invalidate_cache("get_thread_summary", (relates_to,)) self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,)) + self._attempt_to_invalidate_cache("get_threads", (room_id,)) async def invalidate_cache_and_stream( self, cache_name: str, keys: Tuple[Any, ...] diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 060fe71454..6698cbf664 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -35,7 +35,7 @@ import attr from prometheus_client import Counter import synapse.metrics -from synapse.api.constants import EventContentFields, EventTypes +from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event @@ -1616,7 +1616,7 @@ class PersistEventsStore: ) # Remove from relations table. - self._handle_redact_relations(txn, event.redacts) + self._handle_redact_relations(txn, event.room_id, event.redacts) # Update the event_forward_extremities, event_backward_extremities and # event_edges tables. @@ -1866,6 +1866,34 @@ class PersistEventsStore: }, ) + if relation.rel_type == RelationTypes.THREAD: + # Upsert into the threads table, but only overwrite the value if the + # new event is of a later topological order OR if the topological + # ordering is equal, but the stream ordering is later. + sql = """ + INSERT INTO threads (room_id, thread_id, latest_event_id, topological_ordering, stream_ordering) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT (room_id, thread_id) + DO UPDATE SET + latest_event_id = excluded.latest_event_id, + topological_ordering = excluded.topological_ordering, + stream_ordering = excluded.stream_ordering + WHERE + threads.topological_ordering <= excluded.topological_ordering AND + threads.stream_ordering < excluded.stream_ordering + """ + + txn.execute( + sql, + ( + event.room_id, + relation.parent_id, + event.event_id, + event.depth, + event.internal_metadata.stream_ordering, + ), + ) + def _handle_insertion_event( self, txn: LoggingTransaction, event: EventBase ) -> None: @@ -1989,13 +2017,14 @@ class PersistEventsStore: txn.execute(sql, (batch_id,)) def _handle_redact_relations( - self, txn: LoggingTransaction, redacted_event_id: str + self, txn: LoggingTransaction, room_id: str, redacted_event_id: str ) -> None: """Handles receiving a redaction and checking whether the redacted event has any relations which must be removed from the database. Args: txn + room_id: The room ID of the event that was redacted. redacted_event_id: The event that was redacted. """ @@ -2024,6 +2053,9 @@ class PersistEventsStore: self.store._invalidate_cache_and_stream( txn, self.store.get_thread_participated, (redacted_relates_to,) ) + self.store._invalidate_cache_and_stream( + txn, self.store.get_threads, (room_id,) + ) self.db_pool.simple_delete_txn( txn, table="event_relations", keyvalues={"event_id": redacted_event_id} diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index e7fbf950e6..ac9b96ab44 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -14,6 +14,7 @@ import logging from typing import ( + TYPE_CHECKING, Collection, Dict, FrozenSet, @@ -29,17 +30,46 @@ from typing import ( import attr from synapse.api.constants import MAIN_TIMELINE, RelationTypes +from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.storage._base import SQLBaseStore -from synapse.storage.database import LoggingTransaction, make_in_list_sql_clause +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, + make_in_list_sql_clause, +) from synapse.storage.databases.main.stream import generate_pagination_where_clause from synapse.storage.engines import PostgresEngine from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken from synapse.util.caches.descriptors import cached, cachedList +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ThreadsNextBatch: + topological_ordering: int + stream_ordering: int + + def __str__(self) -> str: + return f"{self.topological_ordering}_{self.stream_ordering}" + + @classmethod + def from_string(cls, string: str) -> "ThreadsNextBatch": + """ + Creates a ThreadsNextBatch from its textual representation. + """ + try: + keys = (int(s) for s in string.split("_")) + return cls(*keys) + except Exception: + raise SynapseError(400, "Invalid threads token") + + @attr.s(slots=True, frozen=True, auto_attribs=True) class _RelatedEvent: """ @@ -56,6 +86,76 @@ class _RelatedEvent: class RelationsWorkerStore(SQLBaseStore): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self.db_pool.updates.register_background_update_handler( + "threads_backfill", self._backfill_threads + ) + + async def _backfill_threads(self, progress: JsonDict, batch_size: int) -> int: + """Backfill the threads table.""" + + def threads_backfill_txn(txn: LoggingTransaction) -> int: + last_thread_id = progress.get("last_thread_id", "") + + # Get the latest event in each thread by topo ordering / stream ordering. + # + # Note that the MAX(event_id) is needed to abide by the rules of group by, + # but doesn't actually do anything since there should only be a single event + # ID per topo/stream ordering pair. + sql = f""" + SELECT room_id, relates_to_id, MAX(topological_ordering), MAX(stream_ordering), MAX(event_id) + FROM event_relations + INNER JOIN events USING (event_id) + WHERE + relates_to_id > ? AND + relation_type = '{RelationTypes.THREAD}' + GROUP BY room_id, relates_to_id + ORDER BY relates_to_id + LIMIT ? + """ + txn.execute(sql, (last_thread_id, batch_size)) + + # No more rows to process. + rows = txn.fetchall() + if not rows: + return 0 + + # Insert the rows into the threads table. If a matching thread already exists, + # assume it is from a newer event. + sql = """ + INSERT INTO threads (room_id, thread_id, topological_ordering, stream_ordering, latest_event_id) + VALUES %s + ON CONFLICT (room_id, thread_id) + DO NOTHING + """ + if isinstance(txn.database_engine, PostgresEngine): + txn.execute_values(sql % ("?",), rows, fetch=False) + else: + txn.execute_batch(sql % ("?, ?, ?, ?, ?",), rows) + + # Mark the progress. + self.db_pool.updates._background_update_progress_txn( + txn, "threads_backfill", {"last_thread_id": rows[-1][1]} + ) + + return txn.rowcount + + result = await self.db_pool.runInteraction( + "threads_backfill", threads_backfill_txn + ) + + if not result: + await self.db_pool.updates._end_background_update("threads_backfill") + + return result + @cached(uncached_args=("event",), tree=True) async def get_relations_for_event( self, @@ -776,6 +876,70 @@ class RelationsWorkerStore(SQLBaseStore): "get_if_user_has_annotated_event", _get_if_user_has_annotated_event ) + @cached(tree=True) + async def get_threads( + self, + room_id: str, + limit: int = 5, + from_token: Optional[ThreadsNextBatch] = None, + ) -> Tuple[List[str], Optional[ThreadsNextBatch]]: + """Get a list of thread IDs, ordered by topological ordering of their + latest reply. + + Args: + room_id: The room the event belongs to. + limit: Only fetch the most recent `limit` threads. + from_token: Fetch rows from a previous next_batch, or from the start if None. + + Returns: + A tuple of: + A list of thread root event IDs. + + The next_batch, if one exists. + """ + # Generate the pagination clause, if necessary. + # + # Find any threads where the latest reply is equal / before the last + # thread's topo ordering and earlier in stream ordering. + pagination_clause = "" + pagination_args: tuple = () + if from_token: + pagination_clause = "AND topological_ordering <= ? AND stream_ordering < ?" + pagination_args = ( + from_token.topological_ordering, + from_token.stream_ordering, + ) + + sql = f""" + SELECT thread_id, topological_ordering, stream_ordering + FROM threads + WHERE + room_id = ? + {pagination_clause} + ORDER BY topological_ordering DESC, stream_ordering DESC + LIMIT ? + """ + + def _get_threads_txn( + txn: LoggingTransaction, + ) -> Tuple[List[str], Optional[ThreadsNextBatch]]: + txn.execute(sql, (room_id, *pagination_args, limit + 1)) + + rows = cast(List[Tuple[str, int, int]], txn.fetchall()) + thread_ids = [r[0] for r in rows] + + # If there are more events, generate the next pagination key from the + # last thread which will be returned. + next_token = None + if len(thread_ids) > limit: + last_topo_id = rows[-2][1] + last_stream_id = rows[-2][2] + next_token = ThreadsNextBatch(last_topo_id, last_stream_id) + + return thread_ids[:limit], next_token + + return await self.db_pool.runInteraction("get_threads", _get_threads_txn) + @cached() async def get_thread_id(self, event_id: str) -> str: """ diff --git a/synapse/storage/schema/main/delta/73/09threads_table.sql b/synapse/storage/schema/main/delta/73/09threads_table.sql new file mode 100644 index 0000000000..aa7c5e9a2e --- /dev/null +++ b/synapse/storage/schema/main/delta/73/09threads_table.sql @@ -0,0 +1,30 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE threads ( + room_id TEXT NOT NULL, + -- The event ID of the root event in the thread. + thread_id TEXT NOT NULL, + -- The latest event ID and corresponding topo / stream ordering. + latest_event_id TEXT NOT NULL, + topological_ordering BIGINT NOT NULL, + stream_ordering BIGINT NOT NULL, + CONSTRAINT threads_uniqueness UNIQUE (room_id, thread_id) +); + +CREATE INDEX threads_ordering_idx ON threads(room_id, topological_ordering, stream_ordering); + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7309, 'threads_backfill', '{}'); diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 988cdb746d..d595295e2c 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -1707,3 +1707,154 @@ class RelationRedactionTestCase(BaseRelationsTestCase): relations[RelationTypes.THREAD]["latest_event"]["event_id"], related_event_id, ) + + +class ThreadsTestCase(BaseRelationsTestCase): + @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) + def test_threads(self) -> None: + """Create threads and ensure the ordering is due to their latest event.""" + # Create 2 threads. + thread_1 = self.parent_id + res = self.helper.send(self.room, body="Thread Root!", tok=self.user_token) + thread_2 = res["event_id"] + + self._send_relation(RelationTypes.THREAD, "m.room.test") + self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2) + + # Request the threads in the room. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] + self.assertEqual(thread_roots, [thread_2, thread_1]) + + # Update the first thread, the ordering should swap. + self._send_relation(RelationTypes.THREAD, "m.room.test") + + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] + self.assertEqual(thread_roots, [thread_1, thread_2]) + + @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) + def test_pagination(self) -> None: + """Create threads and paginate through them.""" + # Create 2 threads. + thread_1 = self.parent_id + res = self.helper.send(self.room, body="Thread Root!", tok=self.user_token) + thread_2 = res["event_id"] + + self._send_relation(RelationTypes.THREAD, "m.room.test") + self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2) + + # Request the threads in the room. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] + self.assertEqual(thread_roots, [thread_2]) + + # Make sure next_batch has something in it that looks like it could be a + # valid token. + next_batch = channel.json_body.get("next_batch") + self.assertIsInstance(next_batch, str, channel.json_body) + + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1&from={next_batch}", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] + self.assertEqual(thread_roots, [thread_1], channel.json_body) + + self.assertNotIn("next_batch", channel.json_body, channel.json_body) + + @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) + def test_include(self) -> None: + """Filtering threads to all or participated in should work.""" + # Thread 1 has the user as the root event. + thread_1 = self.parent_id + self._send_relation( + RelationTypes.THREAD, "m.room.test", access_token=self.user2_token + ) + + # Thread 2 has the user replying. + res = self.helper.send(self.room, body="Thread Root!", tok=self.user2_token) + thread_2 = res["event_id"] + self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2) + + # Thread 3 has the user not participating in. + res = self.helper.send(self.room, body="Another thread!", tok=self.user2_token) + thread_3 = res["event_id"] + self._send_relation( + RelationTypes.THREAD, + "m.room.test", + access_token=self.user2_token, + parent_id=thread_3, + ) + + # All threads in the room. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] + self.assertEqual( + thread_roots, [thread_3, thread_2, thread_1], channel.json_body + ) + + # Only participated threads. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?include=participated", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] + self.assertEqual(thread_roots, [thread_2, thread_1], channel.json_body) + + @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) + def test_ignored_user(self) -> None: + """Events from ignored users should be ignored.""" + # Thread 1 has a reply from an ignored user. + thread_1 = self.parent_id + self._send_relation( + RelationTypes.THREAD, "m.room.test", access_token=self.user2_token + ) + + # Thread 2 is created by an ignored user. + res = self.helper.send(self.room, body="Thread Root!", tok=self.user2_token) + thread_2 = res["event_id"] + self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2) + + # Ignore user2. + self.get_success( + self.store.add_account_data_for_user( + self.user_id, + AccountDataTypes.IGNORED_USER_LIST, + {"ignored_users": {self.user2_id: {}}}, + ) + ) + + # Only thread 1 is returned. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] + self.assertEqual(thread_roots, [thread_1], channel.json_body) -- cgit 1.5.1 From c3a4780080a5bcb04132283c0f32f7452655792a Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 18 Oct 2022 12:33:18 +0100 Subject: When restarting a partial join resync, prioritise the server which actioned a partial join (#14126) --- changelog.d/14126.misc | 1 + synapse/handlers/device.py | 5 +- synapse/handlers/federation.py | 57 +++++++++++++--------- synapse/storage/database.py | 2 +- synapse/storage/databases/main/room.py | 43 +++++++++++++--- .../delta/73/09partial_joined_via_destination.sql | 18 +++++++ 6 files changed, 95 insertions(+), 31 deletions(-) create mode 100644 changelog.d/14126.misc create mode 100644 synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/14126.misc b/changelog.d/14126.misc new file mode 100644 index 0000000000..30b3482fbd --- /dev/null +++ b/changelog.d/14126.misc @@ -0,0 +1 @@ +Faster joins: prioritise the server we joined by when restarting a partial join resync. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index f9cc5bddbc..c597639a7f 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -937,7 +937,10 @@ class DeviceListUpdater: # Check if we are partially joining any rooms. If so we need to store # all device list updates so that we can handle them correctly once we # know who is in the room. - partial_rooms = await self.store.get_partial_state_rooms_and_servers() + # TODO(faster joins): this fetches and processes a bunch of data that we don't + # use. Could be replaced by a tighter query e.g. + # SELECT EXISTS(SELECT 1 FROM partial_state_rooms) + partial_rooms = await self.store.get_partial_state_room_resync_info() if partial_rooms: await self.store.add_remote_device_list_to_pending( user_id, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5f7e0a1f79..ccc045d36f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -632,6 +632,7 @@ class FederationHandler: room_id=room_id, servers=ret.servers_in_room, device_lists_stream_id=self.store.get_device_stream_token(), + joined_via=origin, ) try: @@ -1615,13 +1616,13 @@ class FederationHandler: """Resumes resyncing of all partial-state rooms after a restart.""" assert not self.config.worker.worker_app - partial_state_rooms = await self.store.get_partial_state_rooms_and_servers() - for room_id, servers_in_room in partial_state_rooms.items(): + partial_state_rooms = await self.store.get_partial_state_room_resync_info() + for room_id, resync_info in partial_state_rooms.items(): run_as_background_process( desc="sync_partial_state_room", func=self._sync_partial_state_room, - initial_destination=None, - other_destinations=servers_in_room, + initial_destination=resync_info.joined_via, + other_destinations=resync_info.servers_in_room, room_id=room_id, ) @@ -1650,28 +1651,12 @@ class FederationHandler: # really leave, that might mean we have difficulty getting the room state over # federation. # https://github.com/matrix-org/synapse/issues/12802 - # - # TODO(faster_joins): we need some way of prioritising which homeservers in - # `other_destinations` to try first, otherwise we'll spend ages trying dead - # homeservers for large rooms. - # https://github.com/matrix-org/synapse/issues/12999 - - if initial_destination is None and len(other_destinations) == 0: - raise ValueError( - f"Cannot resync state of {room_id}: no destinations provided" - ) # Make an infinite iterator of destinations to try. Once we find a working # destination, we'll stick with it until it flakes. - destinations: Collection[str] - if initial_destination is not None: - # Move `initial_destination` to the front of the list. - destinations = list(other_destinations) - if initial_destination in destinations: - destinations.remove(initial_destination) - destinations = [initial_destination] + destinations - else: - destinations = other_destinations + destinations = _prioritise_destinations_for_partial_state_resync( + initial_destination, other_destinations, room_id + ) destination_iter = itertools.cycle(destinations) # `destination` is the current remote homeserver we're pulling from. @@ -1769,3 +1754,29 @@ class FederationHandler: room_id, destination, ) + + +def _prioritise_destinations_for_partial_state_resync( + initial_destination: Optional[str], + other_destinations: Collection[str], + room_id: str, +) -> Collection[str]: + """Work out the order in which we should ask servers to resync events. + + If an `initial_destination` is given, it takes top priority. Otherwise + all servers are treated equally. + + :raises ValueError: if no destination is provided at all. + """ + if initial_destination is None and len(other_destinations) == 0: + raise ValueError(f"Cannot resync state of {room_id}: no destinations provided") + + if initial_destination is None: + return other_destinations + + # Move `initial_destination` to the front of the list. + destinations = list(other_destinations) + if initial_destination in destinations: + destinations.remove(initial_destination) + destinations = [initial_destination] + destinations + return destinations diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 7bb21f8f81..4717c9728a 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1658,7 +1658,7 @@ class DatabasePool: table: string giving the table name keyvalues: dict of column names and values to select the row with retcol: string giving the name of the column to return - allow_none: If true, return None instead of failing if the SELECT + allow_none: If true, return None instead of raising StoreError if the SELECT statement returns no rows desc: description of the transaction, for logging and metrics """ diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index e41c99027a..7d97f8f60e 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -97,6 +97,12 @@ class RoomSortOrder(Enum): STATE_EVENTS = "state_events" +@attr.s(slots=True, frozen=True, auto_attribs=True) +class PartialStateResyncInfo: + joined_via: Optional[str] + servers_in_room: List[str] = attr.ib(factory=list) + + class RoomWorkerStore(CacheInvalidationWorkerStore): def __init__( self, @@ -1160,17 +1166,29 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): desc="get_partial_state_servers_at_join", ) - async def get_partial_state_rooms_and_servers( + async def get_partial_state_room_resync_info( self, - ) -> Mapping[str, Collection[str]]: - """Get all rooms containing events with partial state, and the servers known - to be in the room. + ) -> Mapping[str, PartialStateResyncInfo]: + """Get all rooms containing events with partial state, and the information + needed to restart a "resync" of those rooms. Returns: A dictionary of rooms with partial state, with room IDs as keys and lists of servers in rooms as values. """ - room_servers: Dict[str, List[str]] = {} + room_servers: Dict[str, PartialStateResyncInfo] = {} + + rows = await self.db_pool.simple_select_list( + table="partial_state_rooms", + keyvalues={}, + retcols=("room_id", "joined_via"), + desc="get_server_which_served_partial_join", + ) + + for row in rows: + room_id = row["room_id"] + joined_via = row["joined_via"] + room_servers[room_id] = PartialStateResyncInfo(joined_via=joined_via) rows = await self.db_pool.simple_select_list( "partial_state_rooms_servers", @@ -1182,7 +1200,15 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): for row in rows: room_id = row["room_id"] server_name = row["server_name"] - room_servers.setdefault(room_id, []).append(server_name) + entry = room_servers.get(room_id) + if entry is None: + # There is a foreign key constraint which enforces that every room_id in + # partial_state_rooms_servers appears in partial_state_rooms. So we + # expect `entry` to be non-null. (This reasoning fails if we've + # partial-joined between the two SELECTs, but this is unlikely to happen + # in practice.) + continue + entry.servers_in_room.append(server_name) return room_servers @@ -1827,6 +1853,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): room_id: str, servers: Collection[str], device_lists_stream_id: int, + joined_via: str, ) -> None: """Mark the given room as containing events with partial state. @@ -1842,6 +1869,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): servers: other servers known to be in the room device_lists_stream_id: the device_lists stream ID at the time when we first joined the room. + joined_via: the server name we requested a partial join from. """ await self.db_pool.runInteraction( "store_partial_state_room", @@ -1849,6 +1877,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): room_id, servers, device_lists_stream_id, + joined_via, ) def _store_partial_state_room_txn( @@ -1857,6 +1886,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): room_id: str, servers: Collection[str], device_lists_stream_id: int, + joined_via: str, ) -> None: DatabasePool.simple_insert_txn( txn, @@ -1866,6 +1896,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): "device_lists_stream_id": device_lists_stream_id, # To be updated later once the join event is persisted. "join_event_id": None, + "joined_via": joined_via, }, ) DatabasePool.simple_insert_many_txn( diff --git a/synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql b/synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql new file mode 100644 index 0000000000..066d602b18 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql @@ -0,0 +1,18 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- When we resync partial state, we prioritise doing so using the server we +-- partial-joined from. To do this we need to record which server that was! +ALTER TABLE partial_state_rooms ADD COLUMN joined_via TEXT; -- cgit 1.5.1 From dbf18f514ea5d2539ba3148049eae5a6793f1d60 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 18 Oct 2022 10:55:41 -0400 Subject: Update the thread_id right before use (in case the bg update hasn't finished) (#14222) This avoids running a forced-update of a null thread_id rows. An index is added (in the background) to hopefully make this easier in the future. --- changelog.d/14222.feature | 1 + .../storage/databases/main/event_push_actions.py | 103 +++++++++++++++++++++ .../delta/73/06thread_notifications_backfill.sql | 29 ------ .../73/06thread_notifications_thread_id_idx.sql | 23 +++++ .../07thread_notifications_not_null.sql.postgres | 19 ---- .../73/07thread_notifications_not_null.sql.sqlite | 101 -------------------- 6 files changed, 127 insertions(+), 149 deletions(-) create mode 100644 changelog.d/14222.feature delete mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql create mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql delete mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres delete mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite (limited to 'synapse/storage/schema') diff --git a/changelog.d/14222.feature b/changelog.d/14222.feature new file mode 100644 index 0000000000..5d0ae16e13 --- /dev/null +++ b/changelog.d/14222.feature @@ -0,0 +1 @@ +Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index f070e6e88a..b283ab0f9c 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -294,6 +294,44 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas self._background_backfill_thread_id, ) + # Indexes which will be used to quickly make the thread_id column non-null. + self.db_pool.updates.register_background_index_update( + "event_push_actions_thread_id_null", + index_name="event_push_actions_thread_id_null", + table="event_push_actions", + columns=["thread_id"], + where_clause="thread_id IS NULL", + ) + self.db_pool.updates.register_background_index_update( + "event_push_summary_thread_id_null", + index_name="event_push_summary_thread_id_null", + table="event_push_summary", + columns=["thread_id"], + where_clause="thread_id IS NULL", + ) + + # Check ASAP (and then later, every 1s) to see if we have finished + # background updates the event_push_actions and event_push_summary tables. + self._clock.call_later(0.0, self._check_event_push_backfill_thread_id) + self._event_push_backfill_thread_id_done = False + + @wrap_as_background_process("check_event_push_backfill_thread_id") + async def _check_event_push_backfill_thread_id(self) -> None: + """ + Has thread_id finished backfilling? + + If not, we need to just-in-time update it so the queries work. + """ + done = await self.db_pool.updates.has_completed_background_update( + "event_push_backfill_thread_id" + ) + + if done: + self._event_push_backfill_thread_id_done = True + else: + # Reschedule to run. + self._clock.call_later(15.0, self._check_event_push_backfill_thread_id) + async def _background_backfill_thread_id( self, progress: JsonDict, batch_size: int ) -> int: @@ -526,6 +564,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), ) + # First ensure that the existing rows have an updated thread_id field. + if not self._event_push_backfill_thread_id_done: + txn.execute( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + txn.execute( + """ + UPDATE event_push_actions + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + # First we pull the counts from the summary table. # # We check that `last_receipt_stream_ordering` matches the stream ordering of the @@ -1341,6 +1398,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas (room_id, user_id, stream_ordering, *thread_args), ) + # First ensure that the existing rows have an updated thread_id field. + if not self._event_push_backfill_thread_id_done: + txn.execute( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + txn.execute( + """ + UPDATE event_push_actions + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + # Fetch the notification counts between the stream ordering of the # latest receipt and what was previously summarised. unread_counts = self._get_notif_unread_count_for_user_room( @@ -1475,6 +1551,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas rotate_to_stream_ordering: The new maximum event stream ordering to summarise. """ + # Ensure that any new actions have an updated thread_id. + if not self._event_push_backfill_thread_id_done: + txn.execute( + """ + UPDATE event_push_actions + SET thread_id = ? + WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL + """, + (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering), + ) + + # XXX Do we need to update summaries here too? + # Calculate the new counts that should be upserted into event_push_summary sql = """ SELECT user_id, room_id, thread_id, @@ -1537,6 +1626,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas logger.info("Rotating notifications, handling %d rows", len(summaries)) + # Ensure that any updated threads have the proper thread_id. + if not self._event_push_backfill_thread_id_done: + txn.execute_batch( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + [ + (MAIN_TIMELINE, room_id, user_id) + for user_id, room_id, _ in summaries + ], + ) + self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql deleted file mode 100644 index 0ffde9bbeb..0000000000 --- a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright 2022 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- Forces the background updates from 06thread_notifications.sql to run in the --- foreground as code will now require those to be "done". - -DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id'; - --- Overwrite any null thread_id columns. -UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL; -UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL; -UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL; - --- Do not run the event_push_summary_unique_index job if it is pending; the --- thread_id field will be made required. -DELETE FROM background_updates WHERE update_name = 'event_push_summary_unique_index'; -DROP INDEX IF EXISTS event_push_summary_unique_index; diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql new file mode 100644 index 0000000000..8b3c636594 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql @@ -0,0 +1,23 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Allow there to be multiple summaries per user/room. +DROP INDEX IF EXISTS event_push_summary_unique_index; + +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (7306, 'event_push_actions_thread_id_null', '{}', 'event_push_backfill_thread_id'); + +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (7306, 'event_push_summary_thread_id_null', '{}', 'event_push_backfill_thread_id'); diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres deleted file mode 100644 index 33674f8c62..0000000000 --- a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright 2022 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- The columns can now be made non-nullable. -ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL; -ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL; -ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL; diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite deleted file mode 100644 index 5322ad77a4..0000000000 --- a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright 2022 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- SQLite doesn't support modifying columns to an existing table, so it must --- be recreated. - --- Create the new tables. -CREATE TABLE event_push_actions_staging_new ( - event_id TEXT NOT NULL, - user_id TEXT NOT NULL, - actions TEXT NOT NULL, - notif SMALLINT NOT NULL, - highlight SMALLINT NOT NULL, - unread SMALLINT, - thread_id TEXT NOT NULL, - inserted_ts BIGINT -); - -CREATE TABLE event_push_actions_new ( - room_id TEXT NOT NULL, - event_id TEXT NOT NULL, - user_id TEXT NOT NULL, - profile_tag VARCHAR(32), - actions TEXT NOT NULL, - topological_ordering BIGINT, - stream_ordering BIGINT, - notif SMALLINT, - highlight SMALLINT, - unread SMALLINT, - thread_id TEXT NOT NULL, - CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) -); - -CREATE TABLE event_push_summary_new ( - user_id TEXT NOT NULL, - room_id TEXT NOT NULL, - notif_count BIGINT NOT NULL, - stream_ordering BIGINT NOT NULL, - unread_count BIGINT, - last_receipt_stream_ordering BIGINT, - thread_id TEXT NOT NULL -); - --- Swap the indexes. -DROP INDEX IF EXISTS event_push_actions_staging_id; -CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging_new(event_id); - -DROP INDEX IF EXISTS event_push_actions_room_id_user_id; -DROP INDEX IF EXISTS event_push_actions_rm_tokens; -DROP INDEX IF EXISTS event_push_actions_stream_ordering; -DROP INDEX IF EXISTS event_push_actions_u_highlight; -DROP INDEX IF EXISTS event_push_actions_highlights_index; -CREATE INDEX event_push_actions_room_id_user_id on event_push_actions_new(room_id, user_id); -CREATE INDEX event_push_actions_rm_tokens on event_push_actions_new( user_id, room_id, topological_ordering, stream_ordering ); -CREATE INDEX event_push_actions_stream_ordering on event_push_actions_new( stream_ordering, user_id ); -CREATE INDEX event_push_actions_u_highlight ON event_push_actions_new (user_id, stream_ordering); -CREATE INDEX event_push_actions_highlights_index ON event_push_actions_new (user_id, room_id, topological_ordering, stream_ordering); - --- Copy the data. -INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts) - SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts - FROM event_push_actions_staging; - -INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id) - SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id - FROM event_push_actions; - -INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id) - SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id - FROM event_push_summary; - --- Drop the old tables. -DROP TABLE event_push_actions_staging; -DROP TABLE event_push_actions; -DROP TABLE event_push_summary; - --- Rename the tables. -ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging; -ALTER TABLE event_push_actions_new RENAME TO event_push_actions; -ALTER TABLE event_push_summary_new RENAME TO event_push_summary; - --- Re-run background updates from 72/02event_push_actions_index.sql and --- 72/06thread_notifications.sql. -INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (7307, 'event_push_summary_unique_index2', '{}') - ON CONFLICT (update_name) DO NOTHING; -INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (7307, 'event_push_actions_stream_highlight_index', '{}') - ON CONFLICT (update_name) DO NOTHING; -- cgit 1.5.1 From d902181de98399d90c46c4e4e2cf631064757941 Mon Sep 17 00:00:00 2001 From: James Salter Date: Tue, 25 Oct 2022 19:05:22 +0100 Subject: Unified search query syntax using the full-text search capabilities of the underlying DB. (#11635) Support a unified search query syntax which leverages more of the full-text search of each database supported by Synapse. Supports, with the same syntax across Postgresql 11+ and Sqlite: - quoted "search terms" - `AND`, `OR`, `-` (negation) operators - Matching words based on their stem, e.g. searches for "dog" matches documents containing "dogs". This is achieved by - If on postgresql 11+, pass the user input to `websearch_to_tsquery` - If on sqlite, manually parse the query and transform it into the sqlite-specific query syntax. Note that postgresql 10, which is close to end-of-life, falls back to using `phraseto_tsquery`, which only supports a subset of the features. Multiple terms separated by a space are implicitly ANDed. Note that: 1. There is no escaping of full-text syntax that might be supported by the database; e.g. `NOT`, `NEAR`, `*` in sqlite. This runs the risk that people might discover this as accidental functionality and depend on something we don't guarantee. 2. English text is assumed for stemming. To support other languages, either the target language needs to be known at the time of indexing the message (via room metadata, or otherwise), or a separate index for each language supported could be created. Sqlite docs: https://www.sqlite.org/fts3.html#full_text_index_queries Postgres docs: https://www.postgresql.org/docs/11/textsearch-controls.html --- changelog.d/11635.feature | 1 + synapse/storage/databases/main/search.py | 197 +++++++++++++++---- synapse/storage/engines/postgres.py | 16 ++ .../delta/73/10_update_sqlite_fts4_tokenizer.py | 62 ++++++ tests/storage/test_room_search.py | 213 +++++++++++++++++++++ 5 files changed, 454 insertions(+), 35 deletions(-) create mode 100644 changelog.d/11635.feature create mode 100644 synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py (limited to 'synapse/storage/schema') diff --git a/changelog.d/11635.feature b/changelog.d/11635.feature new file mode 100644 index 0000000000..94c8a83212 --- /dev/null +++ b/changelog.d/11635.feature @@ -0,0 +1 @@ +Allow use of postgres and sqllite full-text search operators in search queries. \ No newline at end of file diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 1b79acf955..a89fc54c2c 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -11,10 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import enum import logging import re -from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set, Tuple +from collections import deque +from dataclasses import dataclass +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Iterable, + List, + Optional, + Set, + Tuple, + Union, +) import attr @@ -27,7 +39,7 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import JsonDict if TYPE_CHECKING: @@ -421,8 +433,6 @@ class SearchStore(SearchBackgroundUpdateStore): """ clauses = [] - search_query = _parse_query(self.database_engine, search_term) - args: List[Any] = [] # Make sure we don't explode because the person is in too many rooms. @@ -444,20 +454,24 @@ class SearchStore(SearchBackgroundUpdateStore): count_clauses = clauses if isinstance(self.database_engine, PostgresEngine): + search_query = search_term + tsquery_func = self.database_engine.tsquery_func sql = ( - "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank," + f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank," " room_id, event_id" " FROM event_search" - " WHERE vector @@ to_tsquery('english', ?)" + f" WHERE vector @@ {tsquery_func}('english', ?)" ) args = [search_query, search_query] + args count_sql = ( "SELECT room_id, count(*) as count FROM event_search" - " WHERE vector @@ to_tsquery('english', ?)" + f" WHERE vector @@ {tsquery_func}('english', ?)" ) count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): + search_query = _parse_query_for_sqlite(search_term) + sql = ( "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" " FROM event_search" @@ -469,7 +483,7 @@ class SearchStore(SearchBackgroundUpdateStore): "SELECT room_id, count(*) as count FROM event_search" " WHERE value MATCH ?" ) - count_args = [search_term] + count_args + count_args = [search_query] + count_args else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -501,7 +515,9 @@ class SearchStore(SearchBackgroundUpdateStore): highlights = None if isinstance(self.database_engine, PostgresEngine): - highlights = await self._find_highlights_in_postgres(search_query, events) + highlights = await self._find_highlights_in_postgres( + search_query, events, tsquery_func + ) count_sql += " GROUP BY room_id" @@ -510,7 +526,6 @@ class SearchStore(SearchBackgroundUpdateStore): ) count = sum(row["count"] for row in count_results if row["room_id"] in room_ids) - return { "results": [ {"event": event_map[r["event_id"]], "rank": r["rank"]} @@ -542,9 +557,6 @@ class SearchStore(SearchBackgroundUpdateStore): Each match as a dictionary. """ clauses = [] - - search_query = _parse_query(self.database_engine, search_term) - args: List[Any] = [] # Make sure we don't explode because the person is in too many rooms. @@ -582,20 +594,23 @@ class SearchStore(SearchBackgroundUpdateStore): args.extend([origin_server_ts, origin_server_ts, stream]) if isinstance(self.database_engine, PostgresEngine): + search_query = search_term + tsquery_func = self.database_engine.tsquery_func sql = ( - "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank," + f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank," " origin_server_ts, stream_ordering, room_id, event_id" " FROM event_search" - " WHERE vector @@ to_tsquery('english', ?) AND " + f" WHERE vector @@ {tsquery_func}('english', ?) AND " ) args = [search_query, search_query] + args count_sql = ( "SELECT room_id, count(*) as count FROM event_search" - " WHERE vector @@ to_tsquery('english', ?) AND " + f" WHERE vector @@ {tsquery_func}('english', ?) AND " ) count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): + # We use CROSS JOIN here to ensure we use the right indexes. # https://sqlite.org/optoverview.html#crossjoin # @@ -614,13 +629,14 @@ class SearchStore(SearchBackgroundUpdateStore): " CROSS JOIN events USING (event_id)" " WHERE " ) + search_query = _parse_query_for_sqlite(search_term) args = [search_query] + args count_sql = ( "SELECT room_id, count(*) as count FROM event_search" " WHERE value MATCH ? AND " ) - count_args = [search_term] + count_args + count_args = [search_query] + count_args else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -660,7 +676,9 @@ class SearchStore(SearchBackgroundUpdateStore): highlights = None if isinstance(self.database_engine, PostgresEngine): - highlights = await self._find_highlights_in_postgres(search_query, events) + highlights = await self._find_highlights_in_postgres( + search_query, events, tsquery_func + ) count_sql += " GROUP BY room_id" @@ -686,7 +704,7 @@ class SearchStore(SearchBackgroundUpdateStore): } async def _find_highlights_in_postgres( - self, search_query: str, events: List[EventBase] + self, search_query: str, events: List[EventBase], tsquery_func: str ) -> Set[str]: """Given a list of events and a search term, return a list of words that match from the content of the event. @@ -697,6 +715,7 @@ class SearchStore(SearchBackgroundUpdateStore): Args: search_query events: A list of events + tsquery_func: The tsquery_* function to use when making queries Returns: A set of strings. @@ -729,7 +748,7 @@ class SearchStore(SearchBackgroundUpdateStore): while stop_sel in value: stop_sel += ">" - query = "SELECT ts_headline(?, to_tsquery('english', ?), %s)" % ( + query = f"SELECT ts_headline(?, {tsquery_func}('english', ?), %s)" % ( _to_postgres_options( { "StartSel": start_sel, @@ -760,20 +779,128 @@ def _to_postgres_options(options_dict: JsonDict) -> str: return "'%s'" % (",".join("%s=%s" % (k, v) for k, v in options_dict.items()),) -def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str: - """Takes a plain unicode string from the user and converts it into a form - that can be passed to database. - We use this so that we can add prefix matching, which isn't something - that is supported by default. +@dataclass +class Phrase: + phrase: List[str] + + +class SearchToken(enum.Enum): + Not = enum.auto() + Or = enum.auto() + And = enum.auto() + + +Token = Union[str, Phrase, SearchToken] +TokenList = List[Token] + + +def _is_stop_word(word: str) -> bool: + # TODO Pull these out of the dictionary: + # https://github.com/postgres/postgres/blob/master/src/backend/snowball/stopwords/english.stop + return word in {"the", "a", "you", "me", "and", "but"} + + +def _tokenize_query(query: str) -> TokenList: + """ + Convert the user-supplied `query` into a TokenList, which can be translated into + some DB-specific syntax. + + The following constructs are supported: + + - phrase queries using "double quotes" + - case-insensitive `or` and `and` operators + - negation of a keyword via unary `-` + - unary hyphen to denote NOT e.g. 'include -exclude' + + The following differs from websearch_to_tsquery: + + - Stop words are not removed. + - Unclosed phrases are treated differently. + + """ + tokens: TokenList = [] + + # Find phrases. + in_phrase = False + parts = deque(query.split('"')) + for i, part in enumerate(parts): + # The contents inside double quotes is treated as a phrase, a trailing + # double quote is not implied. + in_phrase = bool(i % 2) and i != (len(parts) - 1) + + # Pull out the individual words, discarding any non-word characters. + words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE)) + + # Phrases have simplified handling of words. + if in_phrase: + # Skip stop words. + phrase = [word for word in words if not _is_stop_word(word)] + + # Consecutive words are implicitly ANDed together. + if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or): + tokens.append(SearchToken.And) + + # Add the phrase. + tokens.append(Phrase(phrase)) + continue + + # Otherwise, not in a phrase. + while words: + word = words.popleft() + + if word.startswith("-"): + tokens.append(SearchToken.Not) + + # If there's more word, put it back to be processed again. + word = word[1:] + if word: + words.appendleft(word) + elif word.lower() == "or": + tokens.append(SearchToken.Or) + else: + # Skip stop words. + if _is_stop_word(word): + continue + + # Consecutive words are implicitly ANDed together. + if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or): + tokens.append(SearchToken.And) + + # Add the search term. + tokens.append(word) + + return tokens + + +def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: + """ + Convert the list of tokens to a string suitable for passing to sqlite's MATCH. + Assume sqlite was compiled with enhanced query syntax. + + Ref: https://www.sqlite.org/fts3.html#full_text_index_queries """ + match_query = [] + for token in tokens: + if isinstance(token, str): + match_query.append(token) + elif isinstance(token, Phrase): + match_query.append('"' + " ".join(token.phrase) + '"') + elif token == SearchToken.Not: + # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search + # term has already been added before this. + match_query.append(" NOT ") + elif token == SearchToken.Or: + match_query.append(" OR ") + elif token == SearchToken.And: + match_query.append(" AND ") + else: + raise ValueError(f"unknown token {token}") + + return "".join(match_query) - # Pull out the individual words, discarding any non-word characters. - results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) - if isinstance(database_engine, PostgresEngine): - return " & ".join(result + ":*" for result in results) - elif isinstance(database_engine, Sqlite3Engine): - return " & ".join(result + "*" for result in results) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") +def _parse_query_for_sqlite(search_term: str) -> str: + """Takes a plain unicode string from the user and converts it into a form + that can be passed to sqllite's matchinfo(). + """ + return _tokens_to_sqlite_match_query(_tokenize_query(search_term)) diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index d8c0f64d9a..9bf74bbf59 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -170,6 +170,22 @@ class PostgresEngine( """Do we support the `RETURNING` clause in insert/update/delete?""" return True + @property + def tsquery_func(self) -> str: + """ + Selects a tsquery_* func to use. + + Ref: https://www.postgresql.org/docs/current/textsearch-controls.html + + Returns: + The function name. + """ + # Postgres 11 added support for websearch_to_tsquery. + assert self._version is not None + if self._version >= 110000: + return "websearch_to_tsquery" + return "plainto_tsquery" + def is_deadlock(self, error: Exception) -> bool: if isinstance(error, psycopg2.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html diff --git a/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py new file mode 100644 index 0000000000..3de0a709eb --- /dev/null +++ b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py @@ -0,0 +1,62 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json + +from synapse.storage.engines import BaseDatabaseEngine, Sqlite3Engine +from synapse.storage.types import Cursor + + +def run_create(cur: Cursor, database_engine: BaseDatabaseEngine) -> None: + """ + Upgrade the event_search table to use the porter tokenizer if it isn't already + + Applies only for sqlite. + """ + if not isinstance(database_engine, Sqlite3Engine): + return + + # Rebuild the table event_search table with tokenize=porter configured. + cur.execute("DROP TABLE event_search") + cur.execute( + """ + CREATE VIRTUAL TABLE event_search + USING fts4 (tokenize=porter, event_id, room_id, sender, key, value ) + """ + ) + + # Re-run the background job to re-populate the event_search table. + cur.execute("SELECT MIN(stream_ordering) FROM events") + row = cur.fetchone() + min_stream_id = row[0] + + # If there are not any events, nothing to do. + if min_stream_id is None: + return + + cur.execute("SELECT MAX(stream_ordering) FROM events") + row = cur.fetchone() + max_stream_id = row[0] + + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + } + progress_json = json.dumps(progress) + + sql = """ + INSERT into background_updates (ordering, update_name, progress_json) + VALUES (?, ?, ?) + """ + + cur.execute(sql, (7310, "event_search", progress_json)) diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index e747c6b50e..9ddc19900a 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -12,11 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List, Tuple, Union +from unittest.case import SkipTest +from unittest.mock import PropertyMock, patch + +from twisted.test.proto_helpers import MemoryReactor + import synapse.rest.admin from synapse.api.constants import EventTypes from synapse.api.errors import StoreError from synapse.rest.client import login, room +from synapse.server import HomeServer +from synapse.storage.databases.main import DataStore +from synapse.storage.databases.main.search import Phrase, SearchToken, _tokenize_query from synapse.storage.engines import PostgresEngine +from synapse.storage.engines.sqlite import Sqlite3Engine +from synapse.util import Clock from tests.unittest import HomeserverTestCase, skip_unless from tests.utils import USE_POSTGRES_FOR_TESTS @@ -187,3 +198,205 @@ class EventSearchInsertionTest(HomeserverTestCase): ), ) self.assertCountEqual(values, ["hi", "2"]) + + +class MessageSearchTest(HomeserverTestCase): + """ + Check message search. + + A powerful way to check the behaviour is to run the following in Postgres >= 11: + + # SELECT websearch_to_tsquery('english', ); + + The result can be compared to the tokenized version for SQLite and Postgres < 11. + + """ + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + + PHRASE = "the quick brown fox jumps over the lazy dog" + + # Each entry is a search query, followed by either a boolean of whether it is + # in the phrase OR a tuple of booleans: whether it matches using websearch + # and using plain search. + COMMON_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [ + ("nope", False), + ("brown", True), + ("quick brown", True), + ("brown quick", True), + ("quick \t brown", True), + ("jump", True), + ("brown nope", False), + ('"brown quick"', (False, True)), + ('"jumps over"', True), + ('"quick fox"', (False, True)), + ("nope OR doublenope", False), + ("furphy OR fox", (True, False)), + ("fox -nope", (True, False)), + ("fox -brown", (False, True)), + ('"fox" quick', True), + ('"fox quick', True), + ('"quick brown', True), + ('" quick "', True), + ('" nope"', False), + ] + # TODO Test non-ASCII cases. + + # Case that fail on SQLite. + POSTGRES_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [ + # SQLite treats NOT as a binary operator. + ("- fox", (False, True)), + ("- nope", (True, False)), + ('"-fox quick', (False, True)), + # PostgreSQL skips stop words. + ('"the quick brown"', True), + ('"over lazy"', True), + ] + + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + # Register a user and create a room, create some messages + self.register_user("alice", "password") + self.access_token = self.login("alice", "password") + self.room_id = self.helper.create_room_as("alice", tok=self.access_token) + + # Send the phrase as a message and check it was created + response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token) + self.assertIn("event_id", response) + + def test_tokenize_query(self) -> None: + """Test the custom logic to tokenize a user's query.""" + cases = ( + ("brown", ["brown"]), + ("quick brown", ["quick", SearchToken.And, "brown"]), + ("quick \t brown", ["quick", SearchToken.And, "brown"]), + ('"brown quick"', [Phrase(["brown", "quick"])]), + ("furphy OR fox", ["furphy", SearchToken.Or, "fox"]), + ("fox -brown", ["fox", SearchToken.Not, "brown"]), + ("- fox", [SearchToken.Not, "fox"]), + ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]), + # No trailing double quoe. + ('"fox quick', ["fox", SearchToken.And, "quick"]), + ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]), + ('" quick "', [Phrase(["quick"])]), + ( + 'q"uick brow"n', + [ + "q", + SearchToken.And, + Phrase(["uick", "brow"]), + SearchToken.And, + "n", + ], + ), + ( + '-"quick brown"', + [SearchToken.Not, Phrase(["quick", "brown"])], + ), + ) + + for query, expected in cases: + tokenized = _tokenize_query(query) + self.assertEqual( + tokenized, expected, f"{tokenized} != {expected} for {query}" + ) + + def _check_test_cases( + self, + store: DataStore, + cases: List[Tuple[str, Union[bool, Tuple[bool, bool]]]], + index=0, + ) -> None: + # Run all the test cases versus search_msgs + for query, expect_to_contain in cases: + if isinstance(expect_to_contain, tuple): + expect_to_contain = expect_to_contain[index] + + result = self.get_success( + store.search_msgs([self.room_id], query, ["content.body"]) + ) + self.assertEquals( + result["count"], + 1 if expect_to_contain else 0, + f"expected '{query}' to match '{self.PHRASE}'" + if expect_to_contain + else f"'{query}' unexpectedly matched '{self.PHRASE}'", + ) + self.assertEquals( + len(result["results"]), + 1 if expect_to_contain else 0, + "results array length should match count", + ) + + # Run them again versus search_rooms + for query, expect_to_contain in cases: + if isinstance(expect_to_contain, tuple): + expect_to_contain = expect_to_contain[index] + + result = self.get_success( + store.search_rooms([self.room_id], query, ["content.body"], 10) + ) + self.assertEquals( + result["count"], + 1 if expect_to_contain else 0, + f"expected '{query}' to match '{self.PHRASE}'" + if expect_to_contain + else f"'{query}' unexpectedly matched '{self.PHRASE}'", + ) + self.assertEquals( + len(result["results"]), + 1 if expect_to_contain else 0, + "results array length should match count", + ) + + def test_postgres_web_search_for_phrase(self): + """ + Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery. + This test is skipped unless the postgres instance supports websearch_to_tsquery. + """ + + store = self.hs.get_datastores().main + if not isinstance(store.database_engine, PostgresEngine): + raise SkipTest("Test only applies when postgres is used as the database") + + if store.database_engine.tsquery_func != "websearch_to_tsquery": + raise SkipTest( + "Test only applies when postgres supporting websearch_to_tsquery is used as the database" + ) + + self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES, index=0) + + def test_postgres_non_web_search_for_phrase(self): + """ + Test postgres searching for phrases without using web search, which is used when websearch_to_tsquery isn't + supported by the current postgres version. + """ + + store = self.hs.get_datastores().main + if not isinstance(store.database_engine, PostgresEngine): + raise SkipTest("Test only applies when postgres is used as the database") + + # Patch supports_websearch_to_tsquery to always return False to ensure we're testing the plainto_tsquery path. + with patch( + "synapse.storage.engines.postgres.PostgresEngine.tsquery_func", + new_callable=PropertyMock, + ) as supports_websearch_to_tsquery: + supports_websearch_to_tsquery.return_value = "plainto_tsquery" + self._check_test_cases( + store, self.COMMON_CASES + self.POSTGRES_CASES, index=1 + ) + + def test_sqlite_search(self): + """ + Test sqlite searching for phrases. + """ + store = self.hs.get_datastores().main + if not isinstance(store.database_engine, Sqlite3Engine): + raise SkipTest("Test only applies when sqlite is used as the database") + + self._check_test_cases(store, self.COMMON_CASES, index=0) -- cgit 1.5.1 From 8756d5c87efc5637da55c9e21d2a4eb2369ba693 Mon Sep 17 00:00:00 2001 From: Quentin Gliech Date: Wed, 26 Oct 2022 12:45:41 +0200 Subject: Save login tokens in database (#13844) * Save login tokens in database Signed-off-by: Quentin Gliech * Add upgrade notes * Track login token reuse in a Prometheus metric Signed-off-by: Quentin Gliech --- changelog.d/13844.misc | 1 + docs/upgrade.md | 9 ++ synapse/handlers/auth.py | 64 +++++++-- synapse/module_api/__init__.py | 41 +----- synapse/rest/client/login.py | 3 +- synapse/rest/client/login_token_request.py | 5 +- synapse/storage/databases/main/registration.py | 156 ++++++++++++++++++++- .../schema/main/delta/73/10login_tokens.sql | 35 +++++ synapse/util/macaroons.py | 87 +----------- tests/handlers/test_auth.py | 135 ++++++++++-------- tests/util/test_macaroons.py | 28 ---- 11 files changed, 337 insertions(+), 227 deletions(-) create mode 100644 changelog.d/13844.misc create mode 100644 synapse/storage/schema/main/delta/73/10login_tokens.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/13844.misc b/changelog.d/13844.misc new file mode 100644 index 0000000000..66f4414df7 --- /dev/null +++ b/changelog.d/13844.misc @@ -0,0 +1 @@ +Save login tokens in database and prevent login token reuse. diff --git a/docs/upgrade.md b/docs/upgrade.md index b81385b191..78c34d0c15 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,15 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.71.0 + +## Removal of the `generate_short_term_login_token` module API method + +As announced with the release of [Synapse 1.69.0](#deprecation-of-the-generate_short_term_login_token-module-api-method), the deprecated `generate_short_term_login_token` module method has been removed. + +Modules relying on it can instead use the `create_login_token` method. + + # Upgrading to v1.69.0 ## Changes to the receipts replication streams diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index f5f0e0e7a7..8b9ef25d29 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -38,6 +38,7 @@ from typing import ( import attr import bcrypt import unpaddedbase64 +from prometheus_client import Counter from twisted.internet.defer import CancelledError from twisted.web.server import Request @@ -48,6 +49,7 @@ from synapse.api.errors import ( Codes, InteractiveAuthIncompleteError, LoginError, + NotFoundError, StoreError, SynapseError, UserDeactivatedError, @@ -63,10 +65,14 @@ from synapse.http.server import finish_request, respond_with_html from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.databases.main.registration import ( + LoginTokenExpired, + LoginTokenLookupResult, + LoginTokenReused, +) from synapse.types import JsonDict, Requester, UserID from synapse.util import stringutils as stringutils from synapse.util.async_helpers import delay_cancellation, maybe_awaitable -from synapse.util.macaroons import LoginTokenAttributes from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.stringutils import base62_encode from synapse.util.threepids import canonicalise_email @@ -80,6 +86,12 @@ logger = logging.getLogger(__name__) INVALID_USERNAME_OR_PASSWORD = "Invalid username or password" +invalid_login_token_counter = Counter( + "synapse_user_login_invalid_login_tokens", + "Counts the number of rejected m.login.token on /login", + ["reason"], +) + def convert_client_dict_legacy_fields_to_identifier( submission: JsonDict, @@ -883,6 +895,25 @@ class AuthHandler: return True + async def create_login_token_for_user_id( + self, + user_id: str, + duration_ms: int = (2 * 60 * 1000), + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, + ) -> str: + login_token = self.generate_login_token() + now = self._clock.time_msec() + expiry_ts = now + duration_ms + await self.store.add_login_token_to_user( + user_id=user_id, + token=login_token, + expiry_ts=expiry_ts, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, + ) + return login_token + async def create_refresh_token_for_user_id( self, user_id: str, @@ -1401,6 +1432,18 @@ class AuthHandler: return None return user_id + def generate_login_token(self) -> str: + """Generates an opaque string, for use as an short-term login token""" + + # we use the following format for access tokens: + # syl__ + + random_string = stringutils.random_string(20) + base = f"syl_{random_string}" + + crc = base62_encode(crc32(base.encode("ascii")), minwidth=6) + return f"{base}_{crc}" + def generate_access_token(self, for_user: UserID) -> str: """Generates an opaque string, for use as an access token""" @@ -1427,16 +1470,17 @@ class AuthHandler: crc = base62_encode(crc32(base.encode("ascii")), minwidth=6) return f"{base}_{crc}" - async def validate_short_term_login_token( - self, login_token: str - ) -> LoginTokenAttributes: + async def consume_login_token(self, login_token: str) -> LoginTokenLookupResult: try: - res = self.macaroon_gen.verify_short_term_login_token(login_token) - except Exception: - raise AuthError(403, "Invalid login token", errcode=Codes.FORBIDDEN) + return await self.store.consume_login_token(login_token) + except LoginTokenExpired: + invalid_login_token_counter.labels("expired").inc() + except LoginTokenReused: + invalid_login_token_counter.labels("reused").inc() + except NotFoundError: + invalid_login_token_counter.labels("not found").inc() - await self.auth_blocking.check_auth_blocking(res.user_id) - return res + raise AuthError(403, "Invalid login token", errcode=Codes.FORBIDDEN) async def delete_access_token(self, access_token: str) -> None: """Invalidate a single access token @@ -1711,7 +1755,7 @@ class AuthHandler: ) # Create a login token - login_token = self.macaroon_gen.generate_short_term_login_token( + login_token = await self.create_login_token_for_user_id( registered_user_id, auth_provider_id=auth_provider_id, auth_provider_session_id=auth_provider_session_id, diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 6a6ae208d1..30e689d00d 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -771,50 +771,11 @@ class ModuleApi: auth_provider_session_id: The session ID got during login from the SSO IdP, if any. """ - # The deprecated `generate_short_term_login_token` method defaulted to an empty - # string for the `auth_provider_id` because of how the underlying macaroon was - # generated. This will change to a proper NULL-able field when the tokens get - # moved to the database. - return self._hs.get_macaroon_generator().generate_short_term_login_token( + return await self._hs.get_auth_handler().create_login_token_for_user_id( user_id, - auth_provider_id or "", - auth_provider_session_id, duration_in_ms, - ) - - def generate_short_term_login_token( - self, - user_id: str, - duration_in_ms: int = (2 * 60 * 1000), - auth_provider_id: str = "", - auth_provider_session_id: Optional[str] = None, - ) -> str: - """Generate a login token suitable for m.login.token authentication - - Added in Synapse v1.9.0. - - This was deprecated in Synapse v1.69.0 in favor of create_login_token, and will - be removed in Synapse 1.71.0. - - Args: - user_id: gives the ID of the user that the token is for - - duration_in_ms: the time that the token will be valid for - - auth_provider_id: the ID of the SSO IdP that the user used to authenticate - to get this token, if any. This is encoded in the token so that - /login can report stats on number of successful logins by IdP. - """ - logger.warn( - "A module configured on this server uses ModuleApi.generate_short_term_login_token(), " - "which is deprecated in favor of ModuleApi.create_login_token(), and will be removed in " - "Synapse 1.71.0", - ) - return self._hs.get_macaroon_generator().generate_short_term_login_token( - user_id, auth_provider_id, auth_provider_session_id, - duration_in_ms, ) @defer.inlineCallbacks diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index f554586ac3..7774f1967d 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -436,8 +436,7 @@ class LoginRestServlet(RestServlet): The body of the JSON response. """ token = login_submission["token"] - auth_handler = self.auth_handler - res = await auth_handler.validate_short_term_login_token(token) + res = await self.auth_handler.consume_login_token(token) return await self._complete_login( res.user_id, diff --git a/synapse/rest/client/login_token_request.py b/synapse/rest/client/login_token_request.py index 277b20fb63..43ea21d5e6 100644 --- a/synapse/rest/client/login_token_request.py +++ b/synapse/rest/client/login_token_request.py @@ -57,7 +57,6 @@ class LoginTokenRequestServlet(RestServlet): self.store = hs.get_datastores().main self.clock = hs.get_clock() self.server_name = hs.config.server.server_name - self.macaroon_gen = hs.get_macaroon_generator() self.auth_handler = hs.get_auth_handler() self.token_timeout = hs.config.experimental.msc3882_token_timeout self.ui_auth = hs.config.experimental.msc3882_ui_auth @@ -76,10 +75,10 @@ class LoginTokenRequestServlet(RestServlet): can_skip_ui_auth=False, # Don't allow skipping of UI auth ) - login_token = self.macaroon_gen.generate_short_term_login_token( + login_token = await self.auth_handler.create_login_token_for_user_id( user_id=requester.user.to_string(), auth_provider_id="org.matrix.msc3882.login_token_request", - duration_in_ms=self.token_timeout, + duration_ms=self.token_timeout, ) return ( diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 2996d6bb4d..0255295317 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -21,7 +21,13 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast import attr from synapse.api.constants import UserTypes -from synapse.api.errors import Codes, StoreError, SynapseError, ThreepidValidationError +from synapse.api.errors import ( + Codes, + NotFoundError, + StoreError, + SynapseError, + ThreepidValidationError, +) from synapse.config.homeserver import HomeServerConfig from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage.database import ( @@ -50,6 +56,14 @@ class ExternalIDReuseException(Exception): because this external id is given to an other user.""" +class LoginTokenExpired(Exception): + """Exception if the login token sent expired""" + + +class LoginTokenReused(Exception): + """Exception if the login token sent was already used""" + + @attr.s(frozen=True, slots=True, auto_attribs=True) class TokenLookupResult: """Result of looking up an access token. @@ -115,6 +129,20 @@ class RefreshTokenLookupResult: If None, the session can be refreshed indefinitely.""" +@attr.s(auto_attribs=True, frozen=True, slots=True) +class LoginTokenLookupResult: + """Result of looking up a login token.""" + + user_id: str + """The user this token belongs to.""" + + auth_provider_id: Optional[str] + """The SSO Identity Provider that the user authenticated with, to get this token.""" + + auth_provider_session_id: Optional[str] + """The session ID advertised by the SSO Identity Provider.""" + + class RegistrationWorkerStore(CacheInvalidationWorkerStore): def __init__( self, @@ -1789,6 +1817,109 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): "replace_refresh_token", _replace_refresh_token_txn ) + async def add_login_token_to_user( + self, + user_id: str, + token: str, + expiry_ts: int, + auth_provider_id: Optional[str], + auth_provider_session_id: Optional[str], + ) -> None: + """Adds a short-term login token for the given user. + + Args: + user_id: The user ID. + token: The new login token to add. + expiry_ts (milliseconds since the epoch): Time after which the login token + cannot be used. + auth_provider_id: The SSO Identity Provider that the user authenticated with + to get this token, if any + auth_provider_session_id: The session ID advertised by the SSO Identity + Provider, if any. + """ + await self.db_pool.simple_insert( + "login_tokens", + { + "token": token, + "user_id": user_id, + "expiry_ts": expiry_ts, + "auth_provider_id": auth_provider_id, + "auth_provider_session_id": auth_provider_session_id, + }, + desc="add_login_token_to_user", + ) + + def _consume_login_token( + self, + txn: LoggingTransaction, + token: str, + ts: int, + ) -> LoginTokenLookupResult: + values = self.db_pool.simple_select_one_txn( + txn, + "login_tokens", + keyvalues={"token": token}, + retcols=( + "user_id", + "expiry_ts", + "used_ts", + "auth_provider_id", + "auth_provider_session_id", + ), + allow_none=True, + ) + + if values is None: + raise NotFoundError() + + self.db_pool.simple_update_one_txn( + txn, + "login_tokens", + keyvalues={"token": token}, + updatevalues={"used_ts": ts}, + ) + user_id = values["user_id"] + expiry_ts = values["expiry_ts"] + used_ts = values["used_ts"] + auth_provider_id = values["auth_provider_id"] + auth_provider_session_id = values["auth_provider_session_id"] + + # Token was already used + if used_ts is not None: + raise LoginTokenReused() + + # Token expired + if ts > int(expiry_ts): + raise LoginTokenExpired() + + return LoginTokenLookupResult( + user_id=user_id, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, + ) + + async def consume_login_token(self, token: str) -> LoginTokenLookupResult: + """Lookup a login token and consume it. + + Args: + token: The login token. + + Returns: + The data stored with that token, including the `user_id`. Returns `None` if + the token does not exist or if it expired. + + Raises: + NotFound if the login token was not found in database + LoginTokenExpired if the login token expired + LoginTokenReused if the login token was already used + """ + return await self.db_pool.runInteraction( + "consume_login_token", + self._consume_login_token, + token, + self._clock.time_msec(), + ) + @cached() async def is_guest(self, user_id: str) -> bool: res = await self.db_pool.simple_select_one_onecol( @@ -2019,6 +2150,12 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): and hs.config.experimental.msc3866.require_approval_for_new_accounts ) + # Create a background job for removing expired login tokens + if hs.config.worker.run_background_tasks: + self._clock.looping_call( + self._delete_expired_login_tokens, THIRTY_MINUTES_IN_MS + ) + async def add_access_token_to_user( self, user_id: str, @@ -2617,6 +2754,23 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): approved, ) + @wrap_as_background_process("delete_expired_login_tokens") + async def _delete_expired_login_tokens(self) -> None: + """Remove login tokens with expiry dates that have passed.""" + + def _delete_expired_login_tokens_txn(txn: LoggingTransaction, ts: int) -> None: + sql = "DELETE FROM login_tokens WHERE expiry_ts <= ?" + txn.execute(sql, (ts,)) + + # We keep the expired tokens for an extra 5 minutes so we can measure how many + # times a token is being used after its expiry + now = self._clock.time_msec() + await self.db_pool.runInteraction( + "delete_expired_login_tokens", + _delete_expired_login_tokens_txn, + now - (5 * 60 * 1000), + ) + def find_max_generated_user_id_localpart(cur: Cursor) -> int: """ diff --git a/synapse/storage/schema/main/delta/73/10login_tokens.sql b/synapse/storage/schema/main/delta/73/10login_tokens.sql new file mode 100644 index 0000000000..a39b7bcece --- /dev/null +++ b/synapse/storage/schema/main/delta/73/10login_tokens.sql @@ -0,0 +1,35 @@ +/* + * Copyright 2022 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Login tokens are short-lived tokens that are used for the m.login.token +-- login method, mainly during SSO logins +CREATE TABLE login_tokens ( + token TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + expiry_ts BIGINT NOT NULL, + used_ts BIGINT, + auth_provider_id TEXT, + auth_provider_session_id TEXT +); + +-- We're sometimes querying them by their session ID we got from their IDP +CREATE INDEX login_tokens_auth_provider_idx + ON login_tokens (auth_provider_id, auth_provider_session_id); + +-- We're deleting them by their expiration time +CREATE INDEX login_tokens_expiry_time_idx + ON login_tokens (expiry_ts); + diff --git a/synapse/util/macaroons.py b/synapse/util/macaroons.py index df77edcce2..5df03d3ddc 100644 --- a/synapse/util/macaroons.py +++ b/synapse/util/macaroons.py @@ -24,7 +24,7 @@ from typing_extensions import Literal from synapse.util import Clock, stringutils -MacaroonType = Literal["access", "delete_pusher", "session", "login"] +MacaroonType = Literal["access", "delete_pusher", "session"] def get_value_from_macaroon(macaroon: pymacaroons.Macaroon, key: str) -> str: @@ -111,19 +111,6 @@ class OidcSessionData: """The session ID of the ongoing UI Auth ("" if this is a login)""" -@attr.s(slots=True, frozen=True, auto_attribs=True) -class LoginTokenAttributes: - """Data we store in a short-term login token""" - - user_id: str - - auth_provider_id: str - """The SSO Identity Provider that the user authenticated with, to get this token.""" - - auth_provider_session_id: Optional[str] - """The session ID advertised by the SSO Identity Provider.""" - - class MacaroonGenerator: def __init__(self, clock: Clock, location: str, secret_key: bytes): self._clock = clock @@ -165,35 +152,6 @@ class MacaroonGenerator: macaroon.add_first_party_caveat(f"pushkey = {pushkey}") return macaroon.serialize() - def generate_short_term_login_token( - self, - user_id: str, - auth_provider_id: str, - auth_provider_session_id: Optional[str] = None, - duration_in_ms: int = (2 * 60 * 1000), - ) -> str: - """Generate a short-term login token used during SSO logins - - Args: - user_id: The user for which the token is valid. - auth_provider_id: The SSO IdP the user used. - auth_provider_session_id: The session ID got during login from the SSO IdP. - - Returns: - A signed token valid for using as a ``m.login.token`` token. - """ - now = self._clock.time_msec() - expiry = now + duration_in_ms - macaroon = self._generate_base_macaroon("login") - macaroon.add_first_party_caveat(f"user_id = {user_id}") - macaroon.add_first_party_caveat(f"time < {expiry}") - macaroon.add_first_party_caveat(f"auth_provider_id = {auth_provider_id}") - if auth_provider_session_id is not None: - macaroon.add_first_party_caveat( - f"auth_provider_session_id = {auth_provider_session_id}" - ) - return macaroon.serialize() - def generate_oidc_session_token( self, state: str, @@ -233,49 +191,6 @@ class MacaroonGenerator: return macaroon.serialize() - def verify_short_term_login_token(self, token: str) -> LoginTokenAttributes: - """Verify a short-term-login macaroon - - Checks that the given token is a valid, unexpired short-term-login token - minted by this server. - - Args: - token: The login token to verify. - - Returns: - A set of attributes carried by this token, including the - ``user_id`` and informations about the SSO IDP used during that - login. - - Raises: - MacaroonVerificationFailedException if the verification failed - """ - macaroon = pymacaroons.Macaroon.deserialize(token) - - v = self._base_verifier("login") - v.satisfy_general(lambda c: c.startswith("user_id = ")) - v.satisfy_general(lambda c: c.startswith("auth_provider_id = ")) - v.satisfy_general(lambda c: c.startswith("auth_provider_session_id = ")) - satisfy_expiry(v, self._clock.time_msec) - v.verify(macaroon, self._secret_key) - - user_id = get_value_from_macaroon(macaroon, "user_id") - auth_provider_id = get_value_from_macaroon(macaroon, "auth_provider_id") - - auth_provider_session_id: Optional[str] = None - try: - auth_provider_session_id = get_value_from_macaroon( - macaroon, "auth_provider_session_id" - ) - except MacaroonVerificationFailedException: - pass - - return LoginTokenAttributes( - user_id=user_id, - auth_provider_id=auth_provider_id, - auth_provider_session_id=auth_provider_session_id, - ) - def verify_guest_token(self, token: str) -> str: """Verify a guest access token macaroon diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py index 7106799d44..036dbbc45b 100644 --- a/tests/handlers/test_auth.py +++ b/tests/handlers/test_auth.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional from unittest.mock import Mock import pymacaroons @@ -19,6 +20,7 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.errors import AuthError, ResourceLimitError from synapse.rest import admin +from synapse.rest.client import login from synapse.server import HomeServer from synapse.util import Clock @@ -29,6 +31,7 @@ from tests.test_utils import make_awaitable class AuthTestCase(unittest.HomeserverTestCase): servlets = [ admin.register_servlets, + login.register_servlets, ] def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: @@ -46,6 +49,23 @@ class AuthTestCase(unittest.HomeserverTestCase): self.user1 = self.register_user("a_user", "pass") + def token_login(self, token: str) -> Optional[str]: + body = { + "type": "m.login.token", + "token": token, + } + + channel = self.make_request( + "POST", + "/_matrix/client/v3/login", + body, + ) + + if channel.code == 200: + return channel.json_body["user_id"] + + return None + def test_macaroon_caveats(self) -> None: token = self.macaroon_generator.generate_guest_access_token("a_user") macaroon = pymacaroons.Macaroon.deserialize(token) @@ -73,49 +93,62 @@ class AuthTestCase(unittest.HomeserverTestCase): v.satisfy_general(verify_guest) v.verify(macaroon, self.hs.config.key.macaroon_secret_key) - def test_short_term_login_token_gives_user_id(self) -> None: - token = self.macaroon_generator.generate_short_term_login_token( - self.user1, "", duration_in_ms=5000 + def test_login_token_gives_user_id(self) -> None: + token = self.get_success( + self.auth_handler.create_login_token_for_user_id( + self.user1, + duration_ms=(5 * 1000), + ) ) - res = self.get_success(self.auth_handler.validate_short_term_login_token(token)) + + res = self.get_success(self.auth_handler.consume_login_token(token)) self.assertEqual(self.user1, res.user_id) - self.assertEqual("", res.auth_provider_id) + self.assertEqual(None, res.auth_provider_id) - # when we advance the clock, the token should be rejected - self.reactor.advance(6) - self.get_failure( - self.auth_handler.validate_short_term_login_token(token), - AuthError, + def test_login_token_reuse_fails(self) -> None: + token = self.get_success( + self.auth_handler.create_login_token_for_user_id( + self.user1, + duration_ms=(5 * 1000), + ) ) - def test_short_term_login_token_gives_auth_provider(self) -> None: - token = self.macaroon_generator.generate_short_term_login_token( - self.user1, auth_provider_id="my_idp" - ) - res = self.get_success(self.auth_handler.validate_short_term_login_token(token)) - self.assertEqual(self.user1, res.user_id) - self.assertEqual("my_idp", res.auth_provider_id) + self.get_success(self.auth_handler.consume_login_token(token)) - def test_short_term_login_token_cannot_replace_user_id(self) -> None: - token = self.macaroon_generator.generate_short_term_login_token( - self.user1, "", duration_in_ms=5000 + self.get_failure( + self.auth_handler.consume_login_token(token), + AuthError, ) - macaroon = pymacaroons.Macaroon.deserialize(token) - res = self.get_success( - self.auth_handler.validate_short_term_login_token(macaroon.serialize()) + def test_login_token_expires(self) -> None: + token = self.get_success( + self.auth_handler.create_login_token_for_user_id( + self.user1, + duration_ms=(5 * 1000), + ) ) - self.assertEqual(self.user1, res.user_id) - - # add another "user_id" caveat, which might allow us to override the - # user_id. - macaroon.add_first_party_caveat("user_id = b_user") + # when we advance the clock, the token should be rejected + self.reactor.advance(6) self.get_failure( - self.auth_handler.validate_short_term_login_token(macaroon.serialize()), + self.auth_handler.consume_login_token(token), AuthError, ) + def test_login_token_gives_auth_provider(self) -> None: + token = self.get_success( + self.auth_handler.create_login_token_for_user_id( + self.user1, + auth_provider_id="my_idp", + auth_provider_session_id="11-22-33-44", + duration_ms=(5 * 1000), + ) + ) + res = self.get_success(self.auth_handler.consume_login_token(token)) + self.assertEqual(self.user1, res.user_id) + self.assertEqual("my_idp", res.auth_provider_id) + self.assertEqual("11-22-33-44", res.auth_provider_session_id) + def test_mau_limits_disabled(self) -> None: self.auth_blocking._limit_usage_by_mau = False # Ensure does not throw exception @@ -125,12 +158,12 @@ class AuthTestCase(unittest.HomeserverTestCase): ) ) - self.get_success( - self.auth_handler.validate_short_term_login_token( - self._get_macaroon().serialize() - ) + token = self.get_success( + self.auth_handler.create_login_token_for_user_id(self.user1) ) + self.assertIsNotNone(self.token_login(token)) + def test_mau_limits_exceeded_large(self) -> None: self.auth_blocking._limit_usage_by_mau = True self.hs.get_datastores().main.get_monthly_active_count = Mock( @@ -147,12 +180,10 @@ class AuthTestCase(unittest.HomeserverTestCase): self.hs.get_datastores().main.get_monthly_active_count = Mock( return_value=make_awaitable(self.large_number_of_users) ) - self.get_failure( - self.auth_handler.validate_short_term_login_token( - self._get_macaroon().serialize() - ), - ResourceLimitError, + token = self.get_success( + self.auth_handler.create_login_token_for_user_id(self.user1) ) + self.assertIsNone(self.token_login(token)) def test_mau_limits_parity(self) -> None: # Ensure we're not at the unix epoch. @@ -171,12 +202,10 @@ class AuthTestCase(unittest.HomeserverTestCase): ), ResourceLimitError, ) - self.get_failure( - self.auth_handler.validate_short_term_login_token( - self._get_macaroon().serialize() - ), - ResourceLimitError, + token = self.get_success( + self.auth_handler.create_login_token_for_user_id(self.user1) ) + self.assertIsNone(self.token_login(token)) # If in monthly active cohort self.hs.get_datastores().main.user_last_seen_monthly_active = Mock( @@ -187,11 +216,10 @@ class AuthTestCase(unittest.HomeserverTestCase): self.user1, device_id=None, valid_until_ms=None ) ) - self.get_success( - self.auth_handler.validate_short_term_login_token( - self._get_macaroon().serialize() - ) + token = self.get_success( + self.auth_handler.create_login_token_for_user_id(self.user1) ) + self.assertIsNotNone(self.token_login(token)) def test_mau_limits_not_exceeded(self) -> None: self.auth_blocking._limit_usage_by_mau = True @@ -209,14 +237,7 @@ class AuthTestCase(unittest.HomeserverTestCase): self.hs.get_datastores().main.get_monthly_active_count = Mock( return_value=make_awaitable(self.small_number_of_users) ) - self.get_success( - self.auth_handler.validate_short_term_login_token( - self._get_macaroon().serialize() - ) - ) - - def _get_macaroon(self) -> pymacaroons.Macaroon: - token = self.macaroon_generator.generate_short_term_login_token( - self.user1, "", duration_in_ms=5000 + token = self.get_success( + self.auth_handler.create_login_token_for_user_id(self.user1) ) - return pymacaroons.Macaroon.deserialize(token) + self.assertIsNotNone(self.token_login(token)) diff --git a/tests/util/test_macaroons.py b/tests/util/test_macaroons.py index 32125f7bb7..40754a4711 100644 --- a/tests/util/test_macaroons.py +++ b/tests/util/test_macaroons.py @@ -84,34 +84,6 @@ class MacaroonGeneratorTestCase(TestCase): ) self.assertEqual(user_id, "@user:tesths") - def test_short_term_login_token(self): - """Test the generation and verification of short-term login tokens""" - token = self.macaroon_generator.generate_short_term_login_token( - user_id="@user:tesths", - auth_provider_id="oidc", - auth_provider_session_id="sid", - duration_in_ms=2 * 60 * 1000, - ) - - info = self.macaroon_generator.verify_short_term_login_token(token) - self.assertEqual(info.user_id, "@user:tesths") - self.assertEqual(info.auth_provider_id, "oidc") - self.assertEqual(info.auth_provider_session_id, "sid") - - # Raises with another secret key - with self.assertRaises(MacaroonVerificationFailedException): - self.other_macaroon_generator.verify_short_term_login_token(token) - - # Wait a minute - self.reactor.pump([60]) - # Shouldn't raise - self.macaroon_generator.verify_short_term_login_token(token) - # Wait another minute - self.reactor.pump([60]) - # Should raise since it expired - with self.assertRaises(MacaroonVerificationFailedException): - self.macaroon_generator.verify_short_term_login_token(token) - def test_oidc_session_token(self): """Test the generation and verification of OIDC session cookies""" state = "arandomstate" -- cgit 1.5.1 From b2c2b030798d0e74d3bf1afb4726465b53620638 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Thu, 10 Nov 2022 19:02:27 +0000 Subject: Fix PostgreSQL sometimes using table scans for `event_search` (#14409) PostgreSQL may underestimate the number of distinct `room_id`s in `event_search`, which can cause it to use table scans for queries for multiple rooms. Fix this by setting `n_distinct` on the column. Resolves #14402. Signed-off-by: Sean Quah --- changelog.d/14409.bugfix | 1 + .../11event_search_room_id_n_distinct.sql.postgres | 33 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 changelog.d/14409.bugfix create mode 100644 synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres (limited to 'synapse/storage/schema') diff --git a/changelog.d/14409.bugfix b/changelog.d/14409.bugfix new file mode 100644 index 0000000000..f720700653 --- /dev/null +++ b/changelog.d/14409.bugfix @@ -0,0 +1 @@ +Fix PostgreSQL sometimes using table scans for queries against the `event_search` table, taking a long time and a large amount of IO. diff --git a/synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres b/synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres new file mode 100644 index 0000000000..93cdaefca1 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres @@ -0,0 +1,33 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- By default the postgres statistics collector massively underestimates the +-- number of distinct rooms in `event_search`, which can cause postgres to use +-- table scans for queries for multiple rooms. +-- +-- To work around this we can manually tell postgres the number of distinct rooms +-- by setting `n_distinct` (a negative value here is the number of distinct values +-- divided by the number of rows, so -0.01 means on average there are 100 rows per +-- distinct value). We don't need a particularly accurate number here, as a) we just +-- want it to always use index scans and b) our estimate is going to be better than the +-- one made by the statistics collector. + +ALTER TABLE event_search ALTER COLUMN room_id SET (n_distinct = -0.01); + +-- Ideally we'd do an `ANALYZE event_search (room_id)` here so that +-- the above gets picked up immediately, but that can take a bit of time so we +-- rely on the autovacuum eventually getting run and doing that in the +-- background for us. -- cgit 1.5.1 From 9cae44f49e6bf4f6b8a20ab11a65da417bb1565f Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 22 Nov 2022 16:46:52 +0000 Subject: Track unconverted device list outbound pokes using a position instead (#14516) When a local device list change is added to `device_lists_changes_in_room`, the `converted_to_destinations` flag is set to `FALSE` and the `_handle_new_device_update_async` background process is started. This background process looks for unconverted rows in `device_lists_changes_in_room`, copies them to `device_lists_outbound_pokes` and updates the flag. To update the `converted_to_destinations` flag, the database performs a `DELETE` and `INSERT` internally, which fragments the table. To avoid this, track unconverted rows using a `(stream ID, room ID)` position instead of the flag. From now on, the `converted_to_destinations` column indicates rows that need converting to outbound pokes, but does not indicate whether the conversion has already taken place. Closes #14037. Signed-off-by: Sean Quah --- changelog.d/14516.misc | 1 + synapse/handlers/device.py | 30 +++++- synapse/storage/database.py | 13 +-- synapse/storage/databases/main/devices.py | 107 +++++++++++++-------- .../73/12refactor_device_list_outbound_pokes.sql | 53 ++++++++++ tests/storage/test_devices.py | 3 +- 6 files changed, 158 insertions(+), 49 deletions(-) create mode 100644 changelog.d/14516.misc create mode 100644 synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/14516.misc b/changelog.d/14516.misc new file mode 100644 index 0000000000..51666c6ffc --- /dev/null +++ b/changelog.d/14516.misc @@ -0,0 +1 @@ +Refactor conversion of device list changes in room to outbound pokes to track unconverted rows using a `(stream ID, room ID)` position instead of updating the `converted_to_destinations` flag on every row. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c597639a7f..da3ddafeae 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -682,13 +682,33 @@ class DeviceHandler(DeviceWorkerHandler): hosts_already_sent_to: Set[str] = set() try: + stream_id, room_id = await self.store.get_device_change_last_converted_pos() + while True: self._handle_new_device_update_new_data = False - rows = await self.store.get_uncoverted_outbound_room_pokes() + max_stream_id = self.store.get_device_stream_token() + rows = await self.store.get_uncoverted_outbound_room_pokes( + stream_id, room_id + ) if not rows: # If the DB returned nothing then there is nothing left to # do, *unless* a new device list update happened during the # DB query. + + # Advance `(stream_id, room_id)`. + # `max_stream_id` comes from *before* the query for unconverted + # rows, which means that any unconverted rows must have a larger + # stream ID. + if max_stream_id > stream_id: + stream_id, room_id = max_stream_id, "" + await self.store.set_device_change_last_converted_pos( + stream_id, room_id + ) + else: + assert max_stream_id == stream_id + # Avoid moving `room_id` backwards. + pass + if self._handle_new_device_update_new_data: continue else: @@ -718,7 +738,6 @@ class DeviceHandler(DeviceWorkerHandler): user_id=user_id, device_id=device_id, room_id=room_id, - stream_id=stream_id, hosts=hosts, context=opentracing_context, ) @@ -752,6 +771,12 @@ class DeviceHandler(DeviceWorkerHandler): hosts_already_sent_to.update(hosts) current_stream_id = stream_id + # Advance `(stream_id, room_id)`. + _, _, room_id, stream_id, _ = rows[-1] + await self.store.set_device_change_last_converted_pos( + stream_id, room_id + ) + finally: self._handle_new_device_update_is_processing = False @@ -834,7 +859,6 @@ class DeviceHandler(DeviceWorkerHandler): user_id=user_id, device_id=device_id, room_id=room_id, - stream_id=None, hosts=potentially_changed_hosts, context=None, ) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 0dc44b246c..a14b13aec8 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -2075,13 +2075,14 @@ class DatabasePool: retcols: Collection[str], allow_none: bool = False, ) -> Optional[Dict[str, Any]]: - select_sql = "SELECT %s FROM %s WHERE %s" % ( - ", ".join(retcols), - table, - " AND ".join("%s = ?" % (k,) for k in keyvalues), - ) + select_sql = "SELECT %s FROM %s" % (", ".join(retcols), table) + + if keyvalues: + select_sql += " WHERE %s" % (" AND ".join("%s = ?" % k for k in keyvalues),) + txn.execute(select_sql, list(keyvalues.values())) + else: + txn.execute(select_sql) - txn.execute(select_sql, list(keyvalues.values())) row = txn.fetchone() if not row: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 57230df5ae..37629115ab 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -2008,27 +2008,48 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) async def get_uncoverted_outbound_room_pokes( - self, limit: int = 10 + self, start_stream_id: int, start_room_id: str, limit: int = 10 ) -> List[Tuple[str, str, str, int, Optional[Dict[str, str]]]]: """Get device list changes by room that have not yet been handled and written to `device_lists_outbound_pokes`. + Args: + start_stream_id: Together with `start_room_id`, indicates the position after + which to return device list changes. + start_room_id: Together with `start_stream_id`, indicates the position after + which to return device list changes. + limit: The maximum number of device list changes to return. + Returns: - A list of user ID, device ID, room ID, stream ID and optional opentracing context. + A list of user ID, device ID, room ID, stream ID and optional opentracing + context, in order of ascending (stream ID, room ID). """ sql = """ SELECT user_id, device_id, room_id, stream_id, opentracing_context FROM device_lists_changes_in_room - WHERE NOT converted_to_destinations - ORDER BY stream_id + WHERE + (stream_id, room_id) > (?, ?) AND + stream_id <= ? AND + NOT converted_to_destinations + ORDER BY stream_id ASC, room_id ASC LIMIT ? """ def get_uncoverted_outbound_room_pokes_txn( txn: LoggingTransaction, ) -> List[Tuple[str, str, str, int, Optional[Dict[str, str]]]]: - txn.execute(sql, (limit,)) + txn.execute( + sql, + ( + start_stream_id, + start_room_id, + # Avoid returning rows if there may be uncommitted device list + # changes with smaller stream IDs. + self._device_list_id_gen.get_current_token(), + limit, + ), + ) return [ ( @@ -2050,49 +2071,25 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): user_id: str, device_id: str, room_id: str, - stream_id: Optional[int], hosts: Collection[str], context: Optional[Dict[str, str]], ) -> None: """Queue the device update to be sent to the given set of hosts, calculated from the room ID. - - Marks the associated row in `device_lists_changes_in_room` as handled, - if `stream_id` is provided. """ + if not hosts: + return def add_device_list_outbound_pokes_txn( txn: LoggingTransaction, stream_ids: List[int] ) -> None: - if hosts: - self._add_device_outbound_poke_to_stream_txn( - txn, - user_id=user_id, - device_id=device_id, - hosts=hosts, - stream_ids=stream_ids, - context=context, - ) - - if stream_id: - self.db_pool.simple_update_txn( - txn, - table="device_lists_changes_in_room", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - "stream_id": stream_id, - "room_id": room_id, - }, - updatevalues={"converted_to_destinations": True}, - ) - - if not hosts: - # If there are no hosts then we don't try and generate stream IDs. - return await self.db_pool.runInteraction( - "add_device_list_outbound_pokes", - add_device_list_outbound_pokes_txn, - [], + self._add_device_outbound_poke_to_stream_txn( + txn, + user_id=user_id, + device_id=device_id, + hosts=hosts, + stream_ids=stream_ids, + context=context, ) async with self._device_list_id_gen.get_next_mult(len(hosts)) as stream_ids: @@ -2156,3 +2153,37 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): "get_pending_remote_device_list_updates_for_room", get_pending_remote_device_list_updates_for_room_txn, ) + + async def get_device_change_last_converted_pos(self) -> Tuple[int, str]: + """ + Get the position of the last row in `device_list_changes_in_room` that has been + converted to `device_lists_outbound_pokes`. + + Rows with a strictly greater position where `converted_to_destinations` is + `FALSE` have not been converted. + """ + + row = await self.db_pool.simple_select_one( + table="device_lists_changes_converted_stream_position", + keyvalues={}, + retcols=["stream_id", "room_id"], + desc="get_device_change_last_converted_pos", + ) + return row["stream_id"], row["room_id"] + + async def set_device_change_last_converted_pos( + self, + stream_id: int, + room_id: str, + ) -> None: + """ + Set the position of the last row in `device_list_changes_in_room` that has been + converted to `device_lists_outbound_pokes`. + """ + + await self.db_pool.simple_update_one( + table="device_lists_changes_converted_stream_position", + keyvalues={}, + updatevalues={"stream_id": stream_id, "room_id": room_id}, + desc="set_device_change_last_converted_pos", + ) diff --git a/synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql b/synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql new file mode 100644 index 0000000000..93d7fcb79b --- /dev/null +++ b/synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql @@ -0,0 +1,53 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Prior to this schema delta, we tracked the set of unconverted rows in +-- `device_lists_changes_in_room` using the `converted_to_destinations` flag. When rows +-- were converted to `device_lists_outbound_pokes`, the `converted_to_destinations` flag +-- would be set. +-- +-- After this schema delta, the `converted_to_destinations` is still populated like +-- before, but the set of unconverted rows is determined by the `stream_id` in the new +-- `device_lists_changes_converted_stream_position` table. +-- +-- If rolled back, Synapse will re-send all device list changes that happened since the +-- schema delta. + +CREATE TABLE IF NOT EXISTS device_lists_changes_converted_stream_position( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + -- The (stream id, room id) of the last row in `device_lists_changes_in_room` that + -- has been converted to `device_lists_outbound_pokes`. Rows with a strictly larger + -- (stream id, room id) where `converted_to_destinations` is `FALSE` have not been + -- converted. + stream_id BIGINT NOT NULL, + -- `room_id` may be an empty string, which compares less than all valid room IDs. + room_id TEXT NOT NULL, + CHECK (Lock='X') +); + +INSERT INTO device_lists_changes_converted_stream_position (stream_id, room_id) VALUES ( + ( + SELECT COALESCE( + -- The last converted stream id is the smallest unconverted stream id minus + -- one. + MIN(stream_id) - 1, + -- If there is no unconverted stream id, the last converted stream id is the + -- largest stream id. + -- Otherwise, pick 1, since stream ids start at 2. + (SELECT COALESCE(MAX(stream_id), 1) FROM device_lists_changes_in_room) + ) FROM device_lists_changes_in_room WHERE NOT converted_to_destinations + ), + '' +); diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index f37505b6cf..8e7db2c4ec 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -28,7 +28,7 @@ class DeviceStoreTestCase(HomeserverTestCase): """ for device_id in device_ids: - stream_id = self.get_success( + self.get_success( self.store.add_device_change_to_streams( user_id, [device_id], ["!some:room"] ) @@ -39,7 +39,6 @@ class DeviceStoreTestCase(HomeserverTestCase): user_id=user_id, device_id=device_id, room_id="!some:room", - stream_id=stream_id, hosts=[host], context={}, ) -- cgit 1.5.1 From f38d7d79c8ec5c389c51327737bd517a27826bd6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Nov 2022 14:09:00 +0000 Subject: Add another index to `device_lists_changes_in_room` (#14534) This helps avoid reading unnecessarily large amounts of data from the table when querying with a set of room IDs. --- changelog.d/14534.misc | 1 + synapse/storage/databases/main/devices.py | 7 +++++++ .../main/delta/73/13add_device_lists_index.sql | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 changelog.d/14534.misc create mode 100644 synapse/storage/schema/main/delta/73/13add_device_lists_index.sql (limited to 'synapse/storage/schema') diff --git a/changelog.d/14534.misc b/changelog.d/14534.misc new file mode 100644 index 0000000000..5fe79042e5 --- /dev/null +++ b/changelog.d/14534.misc @@ -0,0 +1 @@ +Improve DB performance by reducing amount of data that gets read in `device_lists_changes_in_room`. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 37629115ab..05a193f889 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1441,6 +1441,13 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): self._remove_duplicate_outbound_pokes, ) + self.db_pool.updates.register_background_index_update( + "device_lists_changes_in_room_by_room_index", + index_name="device_lists_changes_in_room_by_room_idx", + table="device_lists_changes_in_room", + columns=["room_id", "stream_id"], + ) + async def _drop_device_list_streams_non_unique_indexes( self, progress: JsonDict, batch_size: int ) -> int: diff --git a/synapse/storage/schema/main/delta/73/13add_device_lists_index.sql b/synapse/storage/schema/main/delta/73/13add_device_lists_index.sql new file mode 100644 index 0000000000..3725022a13 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/13add_device_lists_index.sql @@ -0,0 +1,20 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Adds an index on `device_lists_changes_in_room (room_id, stream_id)`, which +-- speeds up `/sync` queries. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7313, 'device_lists_changes_in_room_by_room_index', '{}'); -- cgit 1.5.1