From 7e5d3b06fa8b6ce3676eb1178d7db0e252d48679 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 6 Nov 2023 15:41:57 -0500 Subject: Collect information for PushRuleEvaluator in parallel. (#16590) Fetch information needed for push rule evaluation in parallel. Ideally this would use query pipelining, but this is not available in psycopg2. Due to the database thread pool this may result in little to no parallelization. --- synapse/storage/databases/main/push_rule.py | 50 ++++++++++++++++++----------- 1 file changed, 31 insertions(+), 19 deletions(-) (limited to 'synapse/storage/databases/main') diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 22025eca56..37135d431d 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -28,8 +28,11 @@ from typing import ( cast, ) +from twisted.internet import defer + from synapse.api.errors import StoreError from synapse.config.homeserver import ExperimentalConfig +from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.replication.tcp.streams import PushRulesStream from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( @@ -51,7 +54,8 @@ from synapse.storage.util.id_generators import ( ) from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRules from synapse.types import JsonDict -from synapse.util import json_encoder +from synapse.util import json_encoder, unwrapFirstError +from synapse.util.async_helpers import gather_results from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -249,23 +253,33 @@ class PushRulesWorkerStore( user_id: [] for user_id in user_ids } - rows = cast( - List[Tuple[str, str, int, int, str, str]], - await self.db_pool.simple_select_many_batch( - table="push_rules", - column="user_name", - iterable=user_ids, - retcols=( - "user_name", - "rule_id", - "priority_class", - "priority", - "conditions", - "actions", + # gatherResults loses all type information. + rows, enabled_map_by_user = await make_deferred_yieldable( + gather_results( + ( + cast( + "defer.Deferred[List[Tuple[str, str, int, int, str, str]]]", + run_in_background( + self.db_pool.simple_select_many_batch, + table="push_rules", + column="user_name", + iterable=user_ids, + retcols=( + "user_name", + "rule_id", + "priority_class", + "priority", + "conditions", + "actions", + ), + desc="bulk_get_push_rules", + batch_size=1000, + ), + ), + run_in_background(self.bulk_get_push_rules_enabled, user_ids), ), - desc="bulk_get_push_rules", - batch_size=1000, - ), + consumeErrors=True, + ).addErrback(unwrapFirstError) ) # Sort by highest priority_class, then highest priority. @@ -276,8 +290,6 @@ class PushRulesWorkerStore( (rule_id, priority_class, conditions, actions) ) - enabled_map_by_user = await self.bulk_get_push_rules_enabled(user_ids) - results: Dict[str, FilteredPushRules] = {} for user_id, rules in raw_rules.items(): -- cgit 1.5.1 From 9738b1c4975b293a1bc25ee27b5527724038baa1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 7 Nov 2023 14:00:25 -0500 Subject: Avoid executing no-op queries. (#16583) If simple_{insert,upsert,update}_many_txn is called without any data to modify then return instead of executing the query. This matches the behavior of simple_{select,delete}_many_txn. --- changelog.d/16583.misc | 1 + synapse/storage/database.py | 32 ++++++++++++++++++++++--------- synapse/storage/databases/main/devices.py | 2 +- synapse/storage/databases/main/events.py | 12 ++++++------ synapse/storage/databases/main/room.py | 2 +- synapse/storage/databases/main/search.py | 4 ++-- tests/storage/test_base.py | 25 +++++------------------- 7 files changed, 39 insertions(+), 39 deletions(-) create mode 100644 changelog.d/16583.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/16583.misc b/changelog.d/16583.misc new file mode 100644 index 0000000000..df5b27b112 --- /dev/null +++ b/changelog.d/16583.misc @@ -0,0 +1 @@ +Avoid executing no-op queries. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index abc7d8a5d2..792f2e7cdf 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1117,7 +1117,7 @@ class DatabasePool: txn: LoggingTransaction, table: str, keys: Collection[str], - values: Iterable[Iterable[Any]], + values: Collection[Iterable[Any]], ) -> None: """Executes an INSERT query on the named table. @@ -1130,6 +1130,9 @@ class DatabasePool: keys: list of column names values: for each row, a list of values in the same order as `keys` """ + # If there's nothing to insert, then skip executing the query. + if not values: + return if isinstance(txn.database_engine, PostgresEngine): # We use `execute_values` as it can be a lot faster than `execute_batch`, @@ -1455,7 +1458,7 @@ class DatabasePool: key_names: Collection[str], key_values: Collection[Iterable[Any]], value_names: Collection[str], - value_values: Iterable[Iterable[Any]], + value_values: Collection[Iterable[Any]], ) -> None: """ Upsert, many times. @@ -1468,6 +1471,19 @@ class DatabasePool: value_values: A list of each row's value column values. Ignored if value_names is empty. """ + # If there's nothing to upsert, then skip executing the query. + if not key_values: + return + + # No value columns, therefore make a blank list so that the following + # zip() works correctly. + if not value_names: + value_values = [() for x in range(len(key_values))] + elif len(value_values) != len(key_values): + raise ValueError( + f"{len(key_values)} key rows and {len(value_values)} value rows: should be the same number." + ) + if table not in self._unsafe_to_upsert_tables: return self.simple_upsert_many_txn_native_upsert( txn, table, key_names, key_values, value_names, value_values @@ -1502,10 +1518,6 @@ class DatabasePool: value_values: A list of each row's value column values. Ignored if value_names is empty. """ - # No value columns, therefore make a blank list so that the following - # zip() works correctly. - if not value_names: - value_values = [() for x in range(len(key_values))] # Lock the table just once, to prevent it being done once per row. # Note that, according to Postgres' documentation, once obtained, @@ -1543,10 +1555,7 @@ class DatabasePool: allnames.extend(value_names) if not value_names: - # No value columns, therefore make a blank list so that the - # following zip() works correctly. latter = "NOTHING" - value_values = [() for x in range(len(key_values))] else: latter = "UPDATE SET " + ", ".join( k + "=EXCLUDED." + k for k in value_names @@ -1910,6 +1919,7 @@ class DatabasePool: Returns: The results as a list of tuples. """ + # If there's nothing to select, then skip executing the query. if not iterable: return [] @@ -2044,6 +2054,9 @@ class DatabasePool: raise ValueError( f"{len(key_values)} key rows and {len(value_values)} value rows: should be the same number." ) + # If there is nothing to update, then skip executing the query. + if not key_values: + return # List of tuples of (value values, then key values) # (This matches the order needed for the query) @@ -2278,6 +2291,7 @@ class DatabasePool: Returns: Number rows deleted """ + # If there's nothing to delete, then skip executing the query. if not values: return 0 diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index b0811a4cf1..04d12a876c 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -703,7 +703,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): key_names=("destination", "user_id"), key_values=[(destination, user_id) for user_id, _ in rows], value_names=("stream_id",), - value_values=((stream_id,) for _, stream_id in rows), + value_values=[(stream_id,) for _, stream_id in rows], ) # Delete all sent outbound pokes diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 647ba182f6..7c34bde3e5 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1476,7 +1476,7 @@ class PersistEventsStore: txn, table="event_json", keys=("event_id", "room_id", "internal_metadata", "json", "format_version"), - values=( + values=[ ( event.event_id, event.room_id, @@ -1485,7 +1485,7 @@ class PersistEventsStore: event.format_version, ) for event, _ in events_and_contexts - ), + ], ) self.db_pool.simple_insert_many_txn( @@ -1508,7 +1508,7 @@ class PersistEventsStore: "state_key", "rejection_reason", ), - values=( + values=[ ( self._instance_name, event.internal_metadata.stream_ordering, @@ -1527,7 +1527,7 @@ class PersistEventsStore: context.rejected, ) for event, context in events_and_contexts - ), + ], ) # If we're persisting an unredacted event we go and ensure @@ -1550,11 +1550,11 @@ class PersistEventsStore: txn, table="state_events", keys=("event_id", "room_id", "type", "state_key"), - values=( + values=[ (event.event_id, event.room_id, event.type, event.state_key) for event, _ in events_and_contexts if event.is_state() - ), + ], ) def _store_rejected_events_txn( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 6d4b9891e7..afb880532e 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -2268,7 +2268,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): txn, table="partial_state_rooms_servers", keys=("room_id", "server_name"), - values=((room_id, s) for s in servers), + values=[(room_id, s) for s in servers], ) self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,)) self._invalidate_cache_and_stream( diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index dbde9130c6..f4bef4c99b 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -106,7 +106,7 @@ class SearchWorkerStore(SQLBaseStore): txn, table="event_search", keys=("event_id", "room_id", "key", "value"), - values=( + values=[ ( entry.event_id, entry.room_id, @@ -114,7 +114,7 @@ class SearchWorkerStore(SQLBaseStore): _clean_value_for_search(entry.value), ) for entry in entries - ), + ], ) else: diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index b4c490b568..de4fcfe026 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -189,17 +189,9 @@ class SQLBaseStoreTestCase(unittest.TestCase): ) if USE_POSTGRES_FOR_TESTS: - self.mock_execute_values.assert_called_once_with( - self.mock_txn, - "INSERT INTO tablename (col1, col2) VALUES ?", - [], - template=None, - fetch=False, - ) + self.mock_execute_values.assert_not_called() else: - self.mock_txn.executemany.assert_called_once_with( - "INSERT INTO tablename (col1, col2) VALUES(?, ?)", [] - ) + self.mock_txn.executemany.assert_not_called() @defer.inlineCallbacks def test_select_one_1col(self) -> Generator["defer.Deferred[object]", object, None]: @@ -393,7 +385,7 @@ class SQLBaseStoreTestCase(unittest.TestCase): ) @defer.inlineCallbacks - def test_update_many_no_values( + def test_update_many_no_iterable( self, ) -> Generator["defer.Deferred[object]", object, None]: yield defer.ensureDeferred( @@ -408,16 +400,9 @@ class SQLBaseStoreTestCase(unittest.TestCase): ) if USE_POSTGRES_FOR_TESTS: - self.mock_execute_batch.assert_called_once_with( - self.mock_txn, - "UPDATE tablename SET col3 = ? WHERE col1 = ? AND col2 = ?", - [], - ) + self.mock_execute_batch.assert_not_called() else: - self.mock_txn.executemany.assert_called_once_with( - "UPDATE tablename SET col3 = ? WHERE col1 = ? AND col2 = ?", - [], - ) + self.mock_txn.executemany.assert_not_called() @defer.inlineCallbacks def test_delete_one(self) -> Generator["defer.Deferred[object]", object, None]: -- cgit 1.5.1