From c3c6c0e6222cc1bc8ae35a66389dc428d0ddbc92 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 28 Feb 2020 11:15:11 +0000 Subject: Add 'device_lists_outbound_pokes' as extra table. This makes sure we check all the relevant tables to get the current max stream ID. Currently not doing so isn't problematic as the max stream ID in `device_lists_outbound_pokes` is the same as in `device_lists_stream`, however that will change. --- synapse/replication/slave/storage/devices.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'synapse/replication/slave/storage/devices.py') diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index 1c77687eea..bf46cc4f8a 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -29,7 +29,13 @@ class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedSto self.hs = hs self._device_list_id_gen = SlavedIdTracker( - db_conn, "device_lists_stream", "stream_id" + db_conn, + "device_lists_stream", + "stream_id", + extra_tables=[ + ("user_signature_stream", "stream_id"), + ("device_lists_outbound_pokes", "stream_id"), + ], ) device_list_max = self._device_list_id_gen.get_current_token() self._device_list_stream_cache = StreamChangeCache( -- cgit 1.5.1 From 9ce4e344a808e15a36a2d9ea03b77ebfc6ac7fe2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 28 Feb 2020 11:24:05 +0000 Subject: Change device list replication to match new semantics. Instead of sending down batches of user ID/host tuples, send down a row per entity (user ID or host). --- synapse/app/generic_worker.py | 2 +- synapse/replication/slave/storage/devices.py | 25 +++++++++++++------------ synapse/replication/tcp/streams/_base.py | 13 +++++++++---- synapse/storage/data_stores/main/devices.py | 15 +++++++++------ 4 files changed, 32 insertions(+), 23 deletions(-) (limited to 'synapse/replication/slave/storage/devices.py') diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index b2c764bfe8..561a6f4b22 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -774,7 +774,7 @@ class FederationSenderHandler(object): # ... as well as device updates and messages elif stream_name == DeviceListsStream.NAME: - hosts = {row.destination for row in rows} + hosts = {row.entity for row in rows if not row.entity.startswith("@")} for host in hosts: self.federation_sender.send_device_messages(host) diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index bf46cc4f8a..01a4f85884 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -61,23 +61,24 @@ class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedSto def process_replication_rows(self, stream_name, token, rows): if stream_name == DeviceListsStream.NAME: self._device_list_id_gen.advance(token) - for row in rows: - self._invalidate_caches_for_devices(token, row.user_id, row.destination) + self._invalidate_caches_for_devices(token, rows) elif stream_name == UserSignatureStream.NAME: + self._device_list_id_gen.advance(token) for row in rows: self._user_signature_stream_cache.entity_has_changed(row.user_id, token) return super(SlavedDeviceStore, self).process_replication_rows( stream_name, token, rows ) - def _invalidate_caches_for_devices(self, token, user_id, destination): - self._device_list_stream_cache.entity_has_changed(user_id, token) - - if destination: - self._device_list_federation_stream_cache.entity_has_changed( - destination, token - ) + def _invalidate_caches_for_devices(self, token, rows): + for row in rows: + if row.entity.startswith("@"): + self._device_list_stream_cache.entity_has_changed(row.entity, token) + self.get_cached_devices_for_user.invalidate((row.entity,)) + self._get_cached_user_device.invalidate_many((row.entity,)) + self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,)) - self.get_cached_devices_for_user.invalidate((user_id,)) - self._get_cached_user_device.invalidate_many((user_id,)) - self.get_device_list_last_stream_id_for_remote.invalidate((user_id,)) + else: + self._device_list_federation_stream_cache.entity_has_changed( + row.entity, token + ) diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 208e8a667b..7a8b6e9df1 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -94,9 +94,13 @@ PublicRoomsStreamRow = namedtuple( "network_id", # str, optional ), ) -DeviceListsStreamRow = namedtuple( - "DeviceListsStreamRow", ("user_id", "destination") # str # str -) + + +@attr.s +class DeviceListsStreamRow: + entity = attr.ib(type=str) + + ToDeviceStreamRow = namedtuple("ToDeviceStreamRow", ("entity",)) # str TagAccountDataStreamRow = namedtuple( "TagAccountDataStreamRow", ("user_id", "room_id", "data") # str # str # dict @@ -363,7 +367,8 @@ class PublicRoomsStream(Stream): class DeviceListsStream(Stream): - """Someone added/changed/removed a device + """Either a user has updated their devices or a remote server needs to be + told about a device update. """ NAME = "device_lists" diff --git a/synapse/storage/data_stores/main/devices.py b/synapse/storage/data_stores/main/devices.py index 3299607910..768afe7a6c 100644 --- a/synapse/storage/data_stores/main/devices.py +++ b/synapse/storage/data_stores/main/devices.py @@ -612,15 +612,18 @@ class DeviceWorkerStore(SQLBaseStore): combined list of changes to devices, and which destinations need to be poked. `destination` may be None if no destinations need to be poked. """ - # We do a group by here as there can be a large number of duplicate - # entries, since we throw away device IDs. + + # This query Does The Right Thing where it'll correctly apply the + # bounds to the inner queries. sql = """ - SELECT MAX(stream_id) AS stream_id, user_id, destination - FROM device_lists_stream - LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) + SELECT stream_id, entity FROM ( + SELECT stream_id, user_id AS entity FROM device_lists_stream + UNION ALL + SELECT stream_id, destination AS entity FROM device_lists_outbound_pokes + ) AS e WHERE ? < stream_id AND stream_id <= ? - GROUP BY user_id, destination """ + return self.db.execute( "get_all_device_list_changes_for_remotes", None, sql, from_key, to_key ) -- cgit 1.5.1 From 6e6476ef07c2d72fbea85603f2eb2a61a6866732 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Mar 2020 10:13:55 +0000 Subject: Comments from review --- synapse/app/generic_worker.py | 3 +++ synapse/replication/slave/storage/devices.py | 3 +++ synapse/storage/data_stores/main/devices.py | 27 +++++++++++++++++++-------- 3 files changed, 25 insertions(+), 8 deletions(-) (limited to 'synapse/replication/slave/storage/devices.py') diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index d596852419..cdc078cf11 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -775,6 +775,9 @@ class FederationSenderHandler(object): # ... as well as device updates and messages elif stream_name == DeviceListsStream.NAME: + # The entities are either user IDs (starting with '@') whose devices + # have changed, or remote servers that we need to tell about + # changes. hosts = {row.entity for row in rows if not row.entity.startswith("@")} for host in hosts: self.federation_sender.send_device_messages(host) diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index 01a4f85884..23b1650e41 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -72,6 +72,9 @@ class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedSto def _invalidate_caches_for_devices(self, token, rows): for row in rows: + # The entities are either user IDs (starting with '@') whose devices + # have changed, or remote servers that we need to tell about + # changes. if row.entity.startswith("@"): self._device_list_stream_cache.entity_has_changed(row.entity, token) self.get_cached_devices_for_user.invalidate((row.entity,)) diff --git a/synapse/storage/data_stores/main/devices.py b/synapse/storage/data_stores/main/devices.py index 06e1d9f033..4c19c02bbc 100644 --- a/synapse/storage/data_stores/main/devices.py +++ b/synapse/storage/data_stores/main/devices.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from typing import List, Tuple from six import iteritems @@ -31,7 +32,7 @@ from synapse.logging.opentracing import ( ) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import Database +from synapse.storage.database import Database, LoggingTransaction from synapse.types import Collection, get_verify_key_from_cross_signing_key from synapse.util.caches.descriptors import ( Cache, @@ -574,10 +575,12 @@ class DeviceWorkerStore(SQLBaseStore): else: return set() - def get_all_device_list_changes_for_remotes(self, from_key, to_key): - """Return a list of `(stream_id, user_id, destination)` which is the - combined list of changes to devices, and which destinations need to be - poked. `destination` may be None if no destinations need to be poked. + async def get_all_device_list_changes_for_remotes( + self, from_key: int, to_key: int + ) -> List[Tuple[int, str]]: + """Return a list of `(stream_id, entity)` which is the combined list of + changes to devices and which destinations need to be poked. Entity is + either a user ID (starting with '@') or a remote destination. """ # This query Does The Right Thing where it'll correctly apply the @@ -591,7 +594,7 @@ class DeviceWorkerStore(SQLBaseStore): WHERE ? < stream_id AND stream_id <= ? """ - return self.db.execute( + return await self.db.execute( "get_all_device_list_changes_for_remotes", None, sql, from_key, to_key ) @@ -1018,11 +1021,19 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): return stream_ids[-1] - def _add_device_change_to_stream_txn(self, txn, user_id, device_ids, stream_ids): + def _add_device_change_to_stream_txn( + self, + txn: LoggingTransaction, + user_id: str, + device_ids: Collection[str], + stream_ids: List[str], + ): txn.call_after( self._device_list_stream_cache.entity_has_changed, user_id, stream_ids[-1], ) + min_stream_id = stream_ids[0] + # Delete older entries in the table, as we really only care about # when the latest change happened. txn.executemany( @@ -1030,7 +1041,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): DELETE FROM device_lists_stream WHERE user_id = ? AND device_id = ? AND stream_id < ? """, - [(user_id, device_id, stream_ids[0]) for device_id in device_ids], + [(user_id, device_id, min_stream_id) for device_id in device_ids], ) self.db.simple_insert_many_txn( -- cgit 1.5.1