From d2d48cce85556753f8443d72aafe697c477c217b Mon Sep 17 00:00:00 2001
From: Eric Eastwood <eric.eastwood@beta.gouv.fr>
Date: Thu, 16 May 2024 05:36:54 -0500
Subject: Refactor Sync handler to be able to return different sync responses
 (`SyncVersion`) (#17200)

Refactor Sync handler to be able to be able to return different sync
responses (`SyncVersion`). Preparation to be able support sync v2 and a
new Sliding Sync `/sync/e2ee` endpoint which returns a subset of sync
v2.

Split upon request:
https://github.com/element-hq/synapse/pull/17167#discussion_r1601497279

Split from https://github.com/element-hq/synapse/pull/17167 where we
will add `SyncVersion.E2EE_SYNC` and a new type of sync response.
---
 synapse/handlers/sync.py    | 65 ++++++++++++++++++++++++++++++++++++++++-----
 synapse/rest/client/sync.py |  2 ++
 2 files changed, 60 insertions(+), 7 deletions(-)

(limited to 'synapse')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0bef58351c..53fe2a6a53 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -20,6 +20,7 @@
 #
 import itertools
 import logging
+from enum import Enum
 from typing import (
     TYPE_CHECKING,
     AbstractSet,
@@ -112,6 +113,23 @@ LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE = 100
 SyncRequestKey = Tuple[Any, ...]
 
 
+class SyncVersion(Enum):
+    """
+    Enum for specifying the version of sync request. This is used to key which type of
+    sync response that we are generating.
+
+    This is different than the `sync_type` you might see used in other code below; which
+    specifies the sub-type sync request (e.g. initial_sync, full_state_sync,
+    incremental_sync) and is really only relevant for the `/sync` v2 endpoint.
+    """
+
+    # These string values are semantically significant because they are used in the the
+    # metrics
+
+    # Traditional `/sync` endpoint
+    SYNC_V2 = "sync_v2"
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class SyncConfig:
     user: UserID
@@ -309,6 +327,7 @@ class SyncHandler:
         self,
         requester: Requester,
         sync_config: SyncConfig,
+        sync_version: SyncVersion,
         since_token: Optional[StreamToken] = None,
         timeout: int = 0,
         full_state: bool = False,
@@ -316,6 +335,17 @@ class SyncHandler:
         """Get the sync for a client if we have new data for it now. Otherwise
         wait for new data to arrive on the server. If the timeout expires, then
         return an empty sync result.
+
+        Args:
+            requester: The user requesting the sync response.
+            sync_config: Config/info necessary to process the sync request.
+            sync_version: Determines what kind of sync response to generate.
+            since_token: The point in the stream to sync from.
+            timeout: How long to wait for new data to arrive before giving up.
+            full_state: Whether to return the full state for each room.
+
+        Returns:
+            When `SyncVersion.SYNC_V2`, returns a full `SyncResult`.
         """
         # If the user is not part of the mau group, then check that limits have
         # not been exceeded (if not part of the group by this point, almost certain
@@ -327,6 +357,7 @@ class SyncHandler:
             sync_config.request_key,
             self._wait_for_sync_for_user,
             sync_config,
+            sync_version,
             since_token,
             timeout,
             full_state,
@@ -338,6 +369,7 @@ class SyncHandler:
     async def _wait_for_sync_for_user(
         self,
         sync_config: SyncConfig,
+        sync_version: SyncVersion,
         since_token: Optional[StreamToken],
         timeout: int,
         full_state: bool,
@@ -363,9 +395,11 @@ class SyncHandler:
         else:
             sync_type = "incremental_sync"
 
+        sync_label = f"{sync_version}:{sync_type}"
+
         context = current_context()
         if context:
-            context.tag = sync_type
+            context.tag = sync_label
 
         # if we have a since token, delete any to-device messages before that token
         # (since we now know that the device has received them)
@@ -384,14 +418,16 @@ class SyncHandler:
             # we are going to return immediately, so don't bother calling
             # notifier.wait_for_events.
             result: SyncResult = await self.current_sync_for_user(
-                sync_config, since_token, full_state=full_state
+                sync_config, sync_version, since_token, full_state=full_state
             )
         else:
             # Otherwise, we wait for something to happen and report it to the user.
             async def current_sync_callback(
                 before_token: StreamToken, after_token: StreamToken
             ) -> SyncResult:
-                return await self.current_sync_for_user(sync_config, since_token)
+                return await self.current_sync_for_user(
+                    sync_config, sync_version, since_token
+                )
 
             result = await self.notifier.wait_for_events(
                 sync_config.user.to_string(),
@@ -416,13 +452,14 @@ class SyncHandler:
                 lazy_loaded = "true"
             else:
                 lazy_loaded = "false"
-            non_empty_sync_counter.labels(sync_type, lazy_loaded).inc()
+            non_empty_sync_counter.labels(sync_label, lazy_loaded).inc()
 
         return result
 
     async def current_sync_for_user(
         self,
         sync_config: SyncConfig,
+        sync_version: SyncVersion,
         since_token: Optional[StreamToken] = None,
         full_state: bool = False,
     ) -> SyncResult:
@@ -431,12 +468,26 @@ class SyncHandler:
         This is a wrapper around `generate_sync_result` which starts an open tracing
         span to track the sync. See `generate_sync_result` for the next part of your
         indoctrination.
+
+        Args:
+            sync_config: Config/info necessary to process the sync request.
+            sync_version: Determines what kind of sync response to generate.
+            since_token: The point in the stream to sync from.p.
+            full_state: Whether to return the full state for each room.
+        Returns:
+            When `SyncVersion.SYNC_V2`, returns a full `SyncResult`.
         """
         with start_active_span("sync.current_sync_for_user"):
             log_kv({"since_token": since_token})
-            sync_result = await self.generate_sync_result(
-                sync_config, since_token, full_state
-            )
+            # Go through the `/sync` v2 path
+            if sync_version == SyncVersion.SYNC_V2:
+                sync_result: SyncResult = await self.generate_sync_result(
+                    sync_config, since_token, full_state
+                )
+            else:
+                raise Exception(
+                    f"Unknown sync_version (this is a Synapse problem): {sync_version}"
+                )
 
             set_tag(SynapseTags.SYNC_RESULT, bool(sync_result))
             return sync_result
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index d19aaf0e22..d0713536e1 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -40,6 +40,7 @@ from synapse.handlers.sync import (
     KnockedSyncResult,
     SyncConfig,
     SyncResult,
+    SyncVersion,
 )
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string
@@ -232,6 +233,7 @@ class SyncRestServlet(RestServlet):
             sync_result = await self.sync_handler.wait_for_sync_for_user(
                 requester,
                 sync_config,
+                SyncVersion.SYNC_V2,
                 since_token=since_token,
                 timeout=timeout,
                 full_state=full_state,
-- 
cgit 1.5.1


From 5e892671a74251109bf9cf4a78bebed9d8085979 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@element.io>
Date: Thu, 16 May 2024 15:04:14 +0100
Subject: Fix bug where push rules would be empty in `/sync` (#17142)

Fixes #16987

Some old accounts seem to have an entry in global account data table for
push rules, which we should ignore
---
 changelog.d/17142.bugfix    |  1 +
 synapse/handlers/sync.py    | 20 ++++++++------------
 tests/handlers/test_sync.py | 29 ++++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/17142.bugfix

(limited to 'synapse')

diff --git a/changelog.d/17142.bugfix b/changelog.d/17142.bugfix
new file mode 100644
index 0000000000..09b617aed1
--- /dev/null
+++ b/changelog.d/17142.bugfix
@@ -0,0 +1 @@
+Fix bug where push rules would be empty in `/sync` for some accounts. Introduced in v1.93.0.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 53fe2a6a53..659499af75 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1971,23 +1971,19 @@ class SyncHandler:
             )
 
             if push_rules_changed:
-                global_account_data = {
-                    AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user(
-                        sync_config.user
-                    ),
-                    **global_account_data,
-                }
+                global_account_data = dict(global_account_data)
+                global_account_data[AccountDataTypes.PUSH_RULES] = (
+                    await self._push_rules_handler.push_rules_for_user(sync_config.user)
+                )
         else:
             all_global_account_data = await self.store.get_global_account_data_for_user(
                 user_id
             )
 
-            global_account_data = {
-                AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user(
-                    sync_config.user
-                ),
-                **all_global_account_data,
-            }
+            global_account_data = dict(all_global_account_data)
+            global_account_data[AccountDataTypes.PUSH_RULES] = (
+                await self._push_rules_handler.push_rules_for_user(sync_config.user)
+            )
 
         account_data_for_user = (
             await sync_config.filter_collection.filter_global_account_data(
diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py
index 9c12a11e3a..0299113b95 100644
--- a/tests/handlers/test_sync.py
+++ b/tests/handlers/test_sync.py
@@ -24,7 +24,7 @@ from parameterized import parameterized
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.constants import EventTypes, JoinRules
+from synapse.api.constants import AccountDataTypes, EventTypes, JoinRules
 from synapse.api.errors import Codes, ResourceLimitError
 from synapse.api.filtering import FilterCollection, Filtering
 from synapse.api.room_versions import RoomVersion, RoomVersions
@@ -895,6 +895,33 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
 
         self.assertIn(private_call_event.event_id, priv_event_ids)
 
+    def test_push_rules_with_bad_account_data(self) -> None:
+        """Some old accounts have managed to set a `m.push_rules` account data,
+        which we should ignore in /sync response.
+        """
+
+        user = self.register_user("alice", "password")
+
+        # Insert the bad account data.
+        self.get_success(
+            self.store.add_account_data_for_user(user, AccountDataTypes.PUSH_RULES, {})
+        )
+
+        sync_result: SyncResult = self.get_success(
+            self.sync_handler.wait_for_sync_for_user(
+                create_requester(user), generate_sync_config(user)
+            )
+        )
+
+        for account_dict in sync_result.account_data:
+            if account_dict["type"] == AccountDataTypes.PUSH_RULES:
+                # We should have lots of push rules here, rather than the bad
+                # empty data.
+                self.assertNotEqual(account_dict["content"], {})
+                return
+
+        self.fail("No push rules found")
+
 
 _request_key = 0
 
-- 
cgit 1.5.1


From fd1200344112eb28486ee6f82ee341ada8bb4f06 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@element.io>
Date: Thu, 16 May 2024 16:07:54 +0100
Subject: Revert "Improve perf of sync device lists" (#17207)

Reverts element-hq/synapse#17191
---
 changelog.d/17191.misc                    |  1 -
 synapse/handlers/sync.py                  | 37 ++++++++++++++++++++++++++-----
 synapse/storage/databases/main/devices.py | 17 ++++++++++++--
 3 files changed, 46 insertions(+), 9 deletions(-)
 delete mode 100644 changelog.d/17191.misc

(limited to 'synapse')

diff --git a/changelog.d/17191.misc b/changelog.d/17191.misc
deleted file mode 100644
index bd55eeaa33..0000000000
--- a/changelog.d/17191.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve performance of calculating device lists changes in `/sync`.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 659499af75..2bd1b8de88 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1854,13 +1854,38 @@ class SyncHandler:
 
         # Step 1a, check for changes in devices of users we share a room
         # with
-        users_that_have_changed = (
-            await self._device_handler.get_device_changes_in_shared_rooms(
-                user_id,
-                sync_result_builder.joined_room_ids,
-                from_token=since_token,
-            )
+        #
+        # We do this in two different ways depending on what we have cached.
+        # If we already have a list of all the user that have changed since
+        # the last sync then it's likely more efficient to compare the rooms
+        # they're in with the rooms the syncing user is in.
+        #
+        # If we don't have that info cached then we get all the users that
+        # share a room with our user and check if those users have changed.
+        cache_result = self.store.get_cached_device_list_changes(
+            since_token.device_list_key
         )
+        if cache_result.hit:
+            changed_users = cache_result.entities
+
+            result = await self.store.get_rooms_for_users(changed_users)
+
+            for changed_user_id, entries in result.items():
+                # Check if the changed user shares any rooms with the user,
+                # or if the changed user is the syncing user (as we always
+                # want to include device list updates of their own devices).
+                if user_id == changed_user_id or any(
+                    rid in joined_rooms for rid in entries
+                ):
+                    users_that_have_changed.add(changed_user_id)
+        else:
+            users_that_have_changed = (
+                await self._device_handler.get_device_changes_in_shared_rooms(
+                    user_id,
+                    sync_result_builder.joined_room_ids,
+                    from_token=since_token,
+                )
+            )
 
         # Step 1b, check for newly joined rooms
         for room_id in newly_joined_rooms:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index d98f0593bc..8dbcb3f5a0 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -70,7 +70,10 @@ from synapse.types import (
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
-from synapse.util.caches.stream_change_cache import StreamChangeCache
+from synapse.util.caches.stream_change_cache import (
+    AllEntitiesChangedResult,
+    StreamChangeCache,
+)
 from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.stringutils import shortstr
@@ -829,6 +832,16 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         )
         return {device[0]: db_to_json(device[1]) for device in devices}
 
+    def get_cached_device_list_changes(
+        self,
+        from_key: int,
+    ) -> AllEntitiesChangedResult:
+        """Get set of users whose devices have changed since `from_key`, or None
+        if that information is not in our cache.
+        """
+
+        return self._device_list_stream_cache.get_all_entities_changed(from_key)
+
     @cancellable
     async def get_all_devices_changed(
         self,
@@ -1462,7 +1475,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
 
         sql = """
             SELECT DISTINCT user_id FROM device_lists_changes_in_room
-            WHERE {clause} AND stream_id > ?
+            WHERE {clause} AND stream_id >= ?
         """
 
         def _get_device_list_changes_in_rooms_txn(
-- 
cgit 1.5.1