From a7bdf98d01d2225a479753a85ba81adf02b16a32 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Aug 2020 21:38:57 +0100 Subject: Rename database classes to make some sense (#8033) --- synapse/storage/util/id_generators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage/util/id_generators.py') diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 787cebfbec..e2ddd01290 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -20,7 +20,7 @@ from typing import Dict, Set, Tuple from typing_extensions import Deque -from synapse.storage.database import Database, LoggingTransaction +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.util.sequence import PostgresSequenceGenerator @@ -239,7 +239,7 @@ class MultiWriterIdGenerator: def __init__( self, db_conn, - db: Database, + db: DatabasePool, instance_name: str, table: str, instance_column: str, -- cgit 1.5.1 From 76d21d14a042756b0c8a8f520dfd9ea09cf092c7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2020 10:39:31 +0100 Subject: Separate `get_current_token` into two. (#8113) The function is used for two purposes: 1) for subscribers of streams to get a token they can use to get further updates with, and 2) for replication to track position of the writers of the stream. For streams with a single writer the two scenarios produce the same result, however the situation becomes complicated for streams with multiple writers. The current `MultiWriterIdGenerator` does not correctly handle the first case (which is not an issue as its only used for the `caches` stream which nothing subscribes to outside of replication). --- changelog.d/8113.misc | 1 + .../slave/storage/_slaved_id_tracker.py | 8 +++++ synapse/replication/tcp/streams/_base.py | 2 +- synapse/storage/databases/main/cache.py | 4 +-- synapse/storage/util/id_generators.py | 36 ++++++++++++++++------ tests/storage/test_id_generators.py | 16 +++++----- 6 files changed, 47 insertions(+), 20 deletions(-) create mode 100644 changelog.d/8113.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8113.misc b/changelog.d/8113.misc new file mode 100644 index 0000000000..00bec4f8ef --- /dev/null +++ b/changelog.d/8113.misc @@ -0,0 +1 @@ +Separate `get_current_token` into two since there are two different use cases for it. diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py index 9d1d173b2f..d43eaf3a29 100644 --- a/synapse/replication/slave/storage/_slaved_id_tracker.py +++ b/synapse/replication/slave/storage/_slaved_id_tracker.py @@ -33,3 +33,11 @@ class SlavedIdTracker(object): int """ return self._current + + def get_current_token_for_writer(self, instance_name: str) -> int: + """Returns the position of the given writer. + + For streams with single writers this is equivalent to + `get_current_token`. + """ + return self.get_current_token() diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 7a42de3f7d..1e92d52165 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -405,7 +405,7 @@ class CachesStream(Stream): store = hs.get_datastore() super().__init__( hs.get_instance_name(), - store.get_cache_stream_token, + store.get_cache_stream_token_for_writer, store.get_all_updated_caches, ) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 10de446065..1e7637a6f5 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -299,8 +299,8 @@ class CacheInvalidationWorkerStore(SQLBaseStore): }, ) - def get_cache_stream_token(self, instance_name): + def get_cache_stream_token_for_writer(self, instance_name: str) -> int: if self._cache_id_gen: - return self._cache_id_gen.get_current_token(instance_name) + return self._cache_id_gen.get_current_token_for_writer(instance_name) else: return 0 diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index e2ddd01290..8276a755e5 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -158,6 +158,14 @@ class StreamIdGenerator(object): return self._current + def get_current_token_for_writer(self, instance_name: str) -> int: + """Returns the position of the given writer. + + For streams with single writers this is equivalent to + `get_current_token`. + """ + return self.get_current_token() + class ChainedIdGenerator(object): """Used to generate new stream ids where the stream must be kept in sync @@ -216,6 +224,14 @@ class ChainedIdGenerator(object): "Attempted to advance token on source for table %r", self._table ) + def get_current_token_for_writer(self, instance_name: str) -> Tuple[int, int]: + """Returns the position of the given writer. + + For streams with single writers this is equivalent to + `get_current_token`. + """ + return self.get_current_token() + class MultiWriterIdGenerator: """An ID generator that tracks a stream that can have multiple writers. @@ -298,7 +314,7 @@ class MultiWriterIdGenerator: # Assert the fetched ID is actually greater than what we currently # believe the ID to be. If not, then the sequence and table have got # out of sync somehow. - assert self.get_current_token() < next_id + assert self.get_current_token_for_writer(self._instance_name) < next_id with self._lock: self._unfinished_ids.add(next_id) @@ -344,16 +360,18 @@ class MultiWriterIdGenerator: curr = self._current_positions.get(self._instance_name, 0) self._current_positions[self._instance_name] = max(curr, next_id) - def get_current_token(self, instance_name: str = None) -> int: - """Gets the current position of a named writer (defaults to current - instance). - - Returns 0 if we don't have a position for the named writer (likely due - to it being a new writer). + def get_current_token(self) -> int: + """Returns the maximum stream id such that all stream ids less than or + equal to it have been successfully persisted. """ - if instance_name is None: - instance_name = self._instance_name + # Currently we don't support this operation, as it's not obvious how to + # condense the stream positions of multiple writers into a single int. + raise NotImplementedError() + + def get_current_token_for_writer(self, instance_name: str) -> int: + """Returns the position of the given writer. + """ with self._lock: return self._current_positions.get(instance_name, 0) diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index e845410dae..7a05194653 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -88,7 +88,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): id_gen = self._create_id_generator() self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token("master"), 7) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) # Try allocating a new ID gen and check that we only see position # advanced after we leave the context manager. @@ -98,12 +98,12 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.assertEqual(stream_id, 8) self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token("master"), 7) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) self.get_success(_get_next_async()) self.assertEqual(id_gen.get_positions(), {"master": 8}) - self.assertEqual(id_gen.get_current_token("master"), 8) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 8) def test_multi_instance(self): """Test that reads and writes from multiple processes are handled @@ -116,8 +116,8 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): second_id_gen = self._create_id_generator("second") self.assertEqual(first_id_gen.get_positions(), {"first": 3, "second": 7}) - self.assertEqual(first_id_gen.get_current_token("first"), 3) - self.assertEqual(first_id_gen.get_current_token("second"), 7) + self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 3) + self.assertEqual(first_id_gen.get_current_token_for_writer("second"), 7) # Try allocating a new ID gen and check that we only see position # advanced after we leave the context manager. @@ -166,7 +166,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): id_gen = self._create_id_generator() self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token("master"), 7) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) # Try allocating a new ID gen and check that we only see position # advanced after we leave the context manager. @@ -176,9 +176,9 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.assertEqual(stream_id, 8) self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token("master"), 7) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) self.get_success(self.db_pool.runInteraction("test", _get_next_txn)) self.assertEqual(id_gen.get_positions(), {"master": 8}) - self.assertEqual(id_gen.get_current_token("master"), 8) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 8) -- cgit 1.5.1 From c9c544cda5748ab106464a8f58031bd60d1aba7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2020 13:41:51 +0100 Subject: Remove `ChainedIdGenerator`. (#8123) It's just a thin wrapper around two ID gens to make `get_current_token` and `get_next` return tuples. This can easily be replaced by calling the appropriate methods on the underlying ID gens directly. --- changelog.d/8123.misc | 1 + synapse/replication/slave/storage/push_rule.py | 10 ++-- synapse/replication/tcp/streams/_base.py | 2 +- synapse/rest/client/v1/push_rule.py | 2 +- synapse/storage/databases/main/push_rule.py | 36 +++++++------- synapse/storage/util/id_generators.py | 68 +------------------------- synapse/streams/events.py | 2 +- 7 files changed, 26 insertions(+), 95 deletions(-) create mode 100644 changelog.d/8123.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8123.misc b/changelog.d/8123.misc new file mode 100644 index 0000000000..7245122896 --- /dev/null +++ b/changelog.d/8123.misc @@ -0,0 +1 @@ +Remove `ChainedIdGenerator`. diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 590187df46..90d90833f9 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import PushRulesStream from synapse.storage.databases.main.push_rule import PushRulesWorkerStore @@ -21,16 +22,13 @@ from .events import SlavedEventStore class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore): - def get_push_rules_stream_token(self): - return ( - self._push_rules_stream_id_gen.get_current_token(), - self._stream_id_gen.get_current_token(), - ) - def get_max_push_rules_stream_id(self): return self._push_rules_stream_id_gen.get_current_token() def process_replication_rows(self, stream_name, instance_name, token, rows): + # We assert this for the benefit of mypy + assert isinstance(self._push_rules_stream_id_gen, SlavedIdTracker) + if stream_name == PushRulesStream.NAME: self._push_rules_stream_id_gen.advance(token) for row in rows: diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 1e92d52165..8c3caf30c9 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -352,7 +352,7 @@ class PushRulesStream(Stream): ) def _current_token(self, instance_name: str) -> int: - push_rules_token, _ = self.store.get_push_rules_stream_token() + push_rules_token = self.store.get_max_push_rules_stream_id() return push_rules_token diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/v1/push_rule.py index e2df638cc5..e781a3bcf4 100644 --- a/synapse/rest/client/v1/push_rule.py +++ b/synapse/rest/client/v1/push_rule.py @@ -159,7 +159,7 @@ class PushRuleRestServlet(RestServlet): return 200, {} def notify_user(self, user_id): - stream_id, _ = self.store.get_push_rules_stream_token() + stream_id = self.store.get_max_push_rules_stream_id() self.notifier.on_new_event("push_rules_key", stream_id, users=[user_id]) async def set_rule_attr(self, user_id, spec, val): diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index c2289a9557..a585e54812 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -30,7 +30,7 @@ from synapse.storage.databases.main.pusher import PusherWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.roommember import RoomMemberWorkerStore from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException -from synapse.storage.util.id_generators import ChainedIdGenerator +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.util import json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -82,9 +82,9 @@ class PushRulesWorkerStore( super(PushRulesWorkerStore, self).__init__(database, db_conn, hs) if hs.config.worker.worker_app is None: - self._push_rules_stream_id_gen = ChainedIdGenerator( - self._stream_id_gen, db_conn, "push_rules_stream", "stream_id" - ) # type: Union[ChainedIdGenerator, SlavedIdTracker] + self._push_rules_stream_id_gen = StreamIdGenerator( + db_conn, "push_rules_stream", "stream_id" + ) # type: Union[StreamIdGenerator, SlavedIdTracker] else: self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id" @@ -338,8 +338,9 @@ class PushRuleStore(PushRulesWorkerStore): ) -> None: conditions_json = json_encoder.encode(conditions) actions_json = json_encoder.encode(actions) - with self._push_rules_stream_id_gen.get_next() as ids: - stream_id, event_stream_ordering = ids + with self._push_rules_stream_id_gen.get_next() as stream_id: + event_stream_ordering = self._stream_id_gen.get_current_token() + if before or after: await self.db_pool.runInteraction( "_add_push_rule_relative_txn", @@ -559,8 +560,9 @@ class PushRuleStore(PushRulesWorkerStore): txn, stream_id, event_stream_ordering, user_id, rule_id, op="DELETE" ) - with self._push_rules_stream_id_gen.get_next() as ids: - stream_id, event_stream_ordering = ids + with self._push_rules_stream_id_gen.get_next() as stream_id: + event_stream_ordering = self._stream_id_gen.get_current_token() + await self.db_pool.runInteraction( "delete_push_rule", delete_push_rule_txn, @@ -569,8 +571,9 @@ class PushRuleStore(PushRulesWorkerStore): ) async def set_push_rule_enabled(self, user_id, rule_id, enabled) -> None: - with self._push_rules_stream_id_gen.get_next() as ids: - stream_id, event_stream_ordering = ids + with self._push_rules_stream_id_gen.get_next() as stream_id: + event_stream_ordering = self._stream_id_gen.get_current_token() + await self.db_pool.runInteraction( "_set_push_rule_enabled_txn", self._set_push_rule_enabled_txn, @@ -643,8 +646,9 @@ class PushRuleStore(PushRulesWorkerStore): data={"actions": actions_json}, ) - with self._push_rules_stream_id_gen.get_next() as ids: - stream_id, event_stream_ordering = ids + with self._push_rules_stream_id_gen.get_next() as stream_id: + event_stream_ordering = self._stream_id_gen.get_current_token() + await self.db_pool.runInteraction( "set_push_rule_actions", set_push_rule_actions_txn, @@ -673,11 +677,5 @@ class PushRuleStore(PushRulesWorkerStore): self.push_rules_stream_cache.entity_has_changed, user_id, stream_id ) - def get_push_rules_stream_token(self): - """Get the position of the push rules stream. - Returns a pair of a stream id for the push_rules stream and the - room stream ordering it corresponds to.""" - return self._push_rules_stream_id_gen.get_current_token() - def get_max_push_rules_stream_id(self): - return self.get_push_rules_stream_token()[0] + return self._push_rules_stream_id_gen.get_current_token() diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 8276a755e5..0bf772d4d1 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -16,7 +16,7 @@ import contextlib import threading from collections import deque -from typing import Dict, Set, Tuple +from typing import Dict, Set from typing_extensions import Deque @@ -167,72 +167,6 @@ class StreamIdGenerator(object): return self.get_current_token() -class ChainedIdGenerator(object): - """Used to generate new stream ids where the stream must be kept in sync - with another stream. It generates pairs of IDs, the first element is an - integer ID for this stream, the second element is the ID for the stream - that this stream needs to be kept in sync with.""" - - def __init__(self, chained_generator, db_conn, table, column): - self.chained_generator = chained_generator - self._table = table - self._lock = threading.Lock() - self._current_max = _load_current_id(db_conn, table, column) - self._unfinished_ids = deque() # type: Deque[Tuple[int, int]] - - def get_next(self): - """ - Usage: - with stream_id_gen.get_next() as (stream_id, chained_id): - # ... persist event ... - """ - with self._lock: - self._current_max += 1 - next_id = self._current_max - chained_id = self.chained_generator.get_current_token() - - self._unfinished_ids.append((next_id, chained_id)) - - @contextlib.contextmanager - def manager(): - try: - yield (next_id, chained_id) - finally: - with self._lock: - self._unfinished_ids.remove((next_id, chained_id)) - - return manager() - - def get_current_token(self): - """Returns the maximum stream id such that all stream ids less than or - equal to it have been successfully persisted. - """ - with self._lock: - if self._unfinished_ids: - stream_id, chained_id = self._unfinished_ids[0] - return stream_id - 1, chained_id - - return self._current_max, self.chained_generator.get_current_token() - - def advance(self, token: int): - """Stub implementation for advancing the token when receiving updates - over replication; raises an exception as this instance should be the - only source of updates. - """ - - raise Exception( - "Attempted to advance token on source for table %r", self._table - ) - - def get_current_token_for_writer(self, instance_name: str) -> Tuple[int, int]: - """Returns the position of the given writer. - - For streams with single writers this is equivalent to - `get_current_token`. - """ - return self.get_current_token() - - class MultiWriterIdGenerator: """An ID generator that tracks a stream that can have multiple writers. diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 393e34b9fb..7ab46f42bf 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -39,7 +39,7 @@ class EventSources(object): self.store = hs.get_datastore() def get_current_token(self) -> StreamToken: - push_rules_key, _ = self.store.get_push_rules_stream_token() + push_rules_key = self.store.get_max_push_rules_stream_id() to_device_key = self.store.get_to_device_stream_token() device_list_key = self.store.get_device_stream_token() groups_key = self.store.get_group_stream_token() -- cgit 1.5.1 From 2231dffee6788836c86e868dd29574970b13dd18 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2020 15:10:08 +0100 Subject: Make StreamIdGen `get_next` and `get_next_mult` async (#8161) This is mainly so that `StreamIdGenerator` and `MultiWriterIdGenerator` will have the same interface, allowing them to be used interchangeably. --- changelog.d/8161.misc | 1 + synapse/storage/databases/main/account_data.py | 4 +-- synapse/storage/databases/main/deviceinbox.py | 4 +-- synapse/storage/databases/main/devices.py | 8 +++-- synapse/storage/databases/main/end_to_end_keys.py | 43 ++++++++++++----------- synapse/storage/databases/main/events.py | 4 +-- synapse/storage/databases/main/group_server.py | 2 +- synapse/storage/databases/main/presence.py | 2 +- synapse/storage/databases/main/push_rule.py | 8 ++--- synapse/storage/databases/main/pusher.py | 4 +-- synapse/storage/databases/main/receipts.py | 3 +- synapse/storage/databases/main/room.py | 6 ++-- synapse/storage/databases/main/tags.py | 4 +-- synapse/storage/util/id_generators.py | 10 +++--- 14 files changed, 54 insertions(+), 49 deletions(-) create mode 100644 changelog.d/8161.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8161.misc b/changelog.d/8161.misc new file mode 100644 index 0000000000..89ff274de3 --- /dev/null +++ b/changelog.d/8161.misc @@ -0,0 +1 @@ +Refactor `StreamIdGenerator` and `MultiWriterIdGenerator` to have the same interface. diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 82aac2bbf3..04042a2c98 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -336,7 +336,7 @@ class AccountDataStore(AccountDataWorkerStore): """ content_json = json_encoder.encode(content) - with self._account_data_id_gen.get_next() as next_id: + with await self._account_data_id_gen.get_next() as next_id: # no need to lock here as room_account_data has a unique constraint # on (user_id, room_id, account_data_type) so simple_upsert will # retry if there is a conflict. @@ -384,7 +384,7 @@ class AccountDataStore(AccountDataWorkerStore): """ content_json = json_encoder.encode(content) - with self._account_data_id_gen.get_next() as next_id: + with await self._account_data_id_gen.get_next() as next_id: # no need to lock here as account_data has a unique constraint on # (user_id, account_data_type) so simple_upsert will retry if # there is a conflict. diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 1f6e995c4f..bb85637a95 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -362,7 +362,7 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) rows.append((destination, stream_id, now_ms, edu_json)) txn.executemany(sql, rows) - with self._device_inbox_id_gen.get_next() as stream_id: + with await self._device_inbox_id_gen.get_next() as stream_id: now_ms = self.clock.time_msec() await self.db_pool.runInteraction( "add_messages_to_device_inbox", add_messages_txn, now_ms, stream_id @@ -411,7 +411,7 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) txn, stream_id, local_messages_by_user_then_device ) - with self._device_inbox_id_gen.get_next() as stream_id: + with await self._device_inbox_id_gen.get_next() as stream_id: now_ms = self.clock.time_msec() await self.db_pool.runInteraction( "add_messages_from_remote_to_device_inbox", diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 9a786e2929..03b45dbc4d 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -380,7 +380,7 @@ class DeviceWorkerStore(SQLBaseStore): THe new stream ID. """ - with self._device_list_id_gen.get_next() as stream_id: + with await self._device_list_id_gen.get_next() as stream_id: await self.db_pool.runInteraction( "add_user_sig_change_to_streams", self._add_user_signature_change_txn, @@ -1146,7 +1146,9 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): if not device_ids: return - with self._device_list_id_gen.get_next_mult(len(device_ids)) as stream_ids: + with await self._device_list_id_gen.get_next_mult( + len(device_ids) + ) as stream_ids: await self.db_pool.runInteraction( "add_device_change_to_stream", self._add_device_change_to_stream_txn, @@ -1159,7 +1161,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): return stream_ids[-1] context = get_active_span_text_map() - with self._device_list_id_gen.get_next_mult( + with await self._device_list_id_gen.get_next_mult( len(hosts) * len(device_ids) ) as stream_ids: await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index f93e0d320d..385868bdab 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -648,7 +648,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): "delete_e2e_keys_by_device", delete_e2e_keys_by_device_txn ) - def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key): + def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key, stream_id): """Set a user's cross-signing key. Args: @@ -658,6 +658,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): for a master key, 'self_signing' for a self-signing key, or 'user_signing' for a user-signing key key (dict): the key data + stream_id (int) """ # the 'key' dict will look something like: # { @@ -695,23 +696,22 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): ) # and finally, store the key itself - with self._cross_signing_id_gen.get_next() as stream_id: - self.db_pool.simple_insert_txn( - txn, - "e2e_cross_signing_keys", - values={ - "user_id": user_id, - "keytype": key_type, - "keydata": json_encoder.encode(key), - "stream_id": stream_id, - }, - ) + self.db_pool.simple_insert_txn( + txn, + "e2e_cross_signing_keys", + values={ + "user_id": user_id, + "keytype": key_type, + "keydata": json_encoder.encode(key), + "stream_id": stream_id, + }, + ) self._invalidate_cache_and_stream( txn, self._get_bare_e2e_cross_signing_keys, (user_id,) ) - def set_e2e_cross_signing_key(self, user_id, key_type, key): + async def set_e2e_cross_signing_key(self, user_id, key_type, key): """Set a user's cross-signing key. Args: @@ -719,13 +719,16 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): key_type (str): the type of cross-signing key to set key (dict): the key data """ - return self.db_pool.runInteraction( - "add_e2e_cross_signing_key", - self._set_e2e_cross_signing_key_txn, - user_id, - key_type, - key, - ) + + with await self._cross_signing_id_gen.get_next() as stream_id: + return await self.db_pool.runInteraction( + "add_e2e_cross_signing_key", + self._set_e2e_cross_signing_key_txn, + user_id, + key_type, + key, + stream_id, + ) def store_e2e_cross_signing_signatures(self, user_id, signatures): """Stores cross-signing signatures. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b90e6de2d5..6313b41eef 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -153,11 +153,11 @@ class PersistEventsStore: # Note: Multiple instances of this function cannot be in flight at # the same time for the same room. if backfilled: - stream_ordering_manager = self._backfill_id_gen.get_next_mult( + stream_ordering_manager = await self._backfill_id_gen.get_next_mult( len(events_and_contexts) ) else: - stream_ordering_manager = self._stream_id_gen.get_next_mult( + stream_ordering_manager = await self._stream_id_gen.get_next_mult( len(events_and_contexts) ) diff --git a/synapse/storage/databases/main/group_server.py b/synapse/storage/databases/main/group_server.py index 0e3b8739c6..a488e0924b 100644 --- a/synapse/storage/databases/main/group_server.py +++ b/synapse/storage/databases/main/group_server.py @@ -1182,7 +1182,7 @@ class GroupServerStore(GroupServerWorkerStore): return next_id - with self._group_updates_id_gen.get_next() as next_id: + with await self._group_updates_id_gen.get_next() as next_id: res = await self.db_pool.runInteraction( "register_user_group_membership", _register_user_group_membership_txn, diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index 4e3ec02d14..c9f655dfb7 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -23,7 +23,7 @@ from synapse.util.iterutils import batch_iter class PresenceStore(SQLBaseStore): async def update_presence(self, presence_states): - stream_ordering_manager = self._presence_id_gen.get_next_mult( + stream_ordering_manager = await self._presence_id_gen.get_next_mult( len(presence_states) ) diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index a585e54812..2fb5b02d7d 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -338,7 +338,7 @@ class PushRuleStore(PushRulesWorkerStore): ) -> None: conditions_json = json_encoder.encode(conditions) actions_json = json_encoder.encode(actions) - with self._push_rules_stream_id_gen.get_next() as stream_id: + with await self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() if before or after: @@ -560,7 +560,7 @@ class PushRuleStore(PushRulesWorkerStore): txn, stream_id, event_stream_ordering, user_id, rule_id, op="DELETE" ) - with self._push_rules_stream_id_gen.get_next() as stream_id: + with await self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() await self.db_pool.runInteraction( @@ -571,7 +571,7 @@ class PushRuleStore(PushRulesWorkerStore): ) async def set_push_rule_enabled(self, user_id, rule_id, enabled) -> None: - with self._push_rules_stream_id_gen.get_next() as stream_id: + with await self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() await self.db_pool.runInteraction( @@ -646,7 +646,7 @@ class PushRuleStore(PushRulesWorkerStore): data={"actions": actions_json}, ) - with self._push_rules_stream_id_gen.get_next() as stream_id: + with await self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index 1126fd0751..c388468273 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -281,7 +281,7 @@ class PusherStore(PusherWorkerStore): last_stream_ordering, profile_tag="", ) -> None: - with self._pushers_id_gen.get_next() as stream_id: + with await self._pushers_id_gen.get_next() as stream_id: # no need to lock because `pushers` has a unique key on # (app_id, pushkey, user_name) so simple_upsert will retry await self.db_pool.simple_upsert( @@ -344,7 +344,7 @@ class PusherStore(PusherWorkerStore): }, ) - with self._pushers_id_gen.get_next() as stream_id: + with await self._pushers_id_gen.get_next() as stream_id: await self.db_pool.runInteraction( "delete_pusher", delete_pusher_txn, stream_id ) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 19ad1c056f..6821476ee0 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -520,8 +520,7 @@ class ReceiptsStore(ReceiptsWorkerStore): "insert_receipt_conv", graph_to_linear ) - stream_id_manager = self._receipts_id_gen.get_next() - with stream_id_manager as stream_id: + with await self._receipts_id_gen.get_next() as stream_id: event_ts = await self.db_pool.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 7d3ac47261..b3772be2b2 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1129,7 +1129,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): }, ) - with self._public_room_id_gen.get_next() as next_id: + with await self._public_room_id_gen.get_next() as next_id: await self.db_pool.runInteraction( "store_room_txn", store_room_txn, next_id ) @@ -1196,7 +1196,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): }, ) - with self._public_room_id_gen.get_next() as next_id: + with await self._public_room_id_gen.get_next() as next_id: await self.db_pool.runInteraction( "set_room_is_public", set_room_is_public_txn, next_id ) @@ -1276,7 +1276,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): }, ) - with self._public_room_id_gen.get_next() as next_id: + with await self._public_room_id_gen.get_next() as next_id: await self.db_pool.runInteraction( "set_room_is_public_appservice", set_room_is_public_appservice_txn, diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py index ade7abc927..0c34bbf21a 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py @@ -210,7 +210,7 @@ class TagsStore(TagsWorkerStore): ) self._update_revision_txn(txn, user_id, room_id, next_id) - with self._account_data_id_gen.get_next() as next_id: + with await self._account_data_id_gen.get_next() as next_id: await self.db_pool.runInteraction("add_tag", add_tag_txn, next_id) self.get_tags_for_user.invalidate((user_id,)) @@ -232,7 +232,7 @@ class TagsStore(TagsWorkerStore): txn.execute(sql, (user_id, room_id, tag)) self._update_revision_txn(txn, user_id, room_id, next_id) - with self._account_data_id_gen.get_next() as next_id: + with await self._account_data_id_gen.get_next() as next_id: await self.db_pool.runInteraction("remove_tag", remove_tag_txn, next_id) self.get_tags_for_user.invalidate((user_id,)) diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 0bf772d4d1..ddb5c8c60c 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -80,7 +80,7 @@ class StreamIdGenerator(object): upwards, -1 to grow downwards. Usage: - with stream_id_gen.get_next() as stream_id: + with await stream_id_gen.get_next() as stream_id: # ... persist event ... """ @@ -95,10 +95,10 @@ class StreamIdGenerator(object): ) self._unfinished_ids = deque() # type: Deque[int] - def get_next(self): + async def get_next(self): """ Usage: - with stream_id_gen.get_next() as stream_id: + with await stream_id_gen.get_next() as stream_id: # ... persist event ... """ with self._lock: @@ -117,10 +117,10 @@ class StreamIdGenerator(object): return manager() - def get_next_mult(self, n): + async def get_next_mult(self, n): """ Usage: - with stream_id_gen.get_next(n) as stream_ids: + with await stream_id_gen.get_next(n) as stream_ids: # ... persist events ... """ with self._lock: -- cgit 1.5.1 From eba98fb024af4c84901a7ba01940ffb3c50950c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2020 17:32:30 +0100 Subject: Add functions to `MultiWriterIdGen` used by events stream (#8164) --- changelog.d/8164.misc | 1 + synapse/storage/util/id_generators.py | 103 +++++++++++++++++++++++++++++++++- synapse/storage/util/sequence.py | 8 ++- tests/storage/test_id_generators.py | 36 ++++++++++++ 4 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 changelog.d/8164.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8164.misc b/changelog.d/8164.misc new file mode 100644 index 0000000000..55bc079cdb --- /dev/null +++ b/changelog.d/8164.misc @@ -0,0 +1 @@ +Add functions to `MultiWriterIdGen` used by events stream. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index ddb5c8c60c..5b07847773 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -14,9 +14,10 @@ # limitations under the License. import contextlib +import heapq import threading from collections import deque -from typing import Dict, Set +from typing import Dict, List, Set from typing_extensions import Deque @@ -210,6 +211,23 @@ class MultiWriterIdGenerator: # should be less than the minimum of this set (if not empty). self._unfinished_ids = set() # type: Set[int] + # We track the max position where we know everything before has been + # persisted. This is done by a) looking at the min across all instances + # and b) noting that if we have seen a run of persisted positions + # without gaps (e.g. 5, 6, 7) then we can skip forward (e.g. to 7). + # + # Note: There is no guarentee that the IDs generated by the sequence + # will be gapless; gaps can form when e.g. a transaction was rolled + # back. This means that sometimes we won't be able to skip forward the + # position even though everything has been persisted. However, since + # gaps should be relatively rare it's still worth doing the book keeping + # that allows us to skip forwards when there are gapless runs of + # positions. + self._persisted_upto_position = ( + min(self._current_positions.values()) if self._current_positions else 0 + ) + self._known_persisted_positions = [] # type: List[int] + self._sequence_gen = PostgresSequenceGenerator(sequence_name) def _load_current_ids( @@ -234,9 +252,12 @@ class MultiWriterIdGenerator: return current_positions - def _load_next_id_txn(self, txn): + def _load_next_id_txn(self, txn) -> int: return self._sequence_gen.get_next_id_txn(txn) + def _load_next_mult_id_txn(self, txn, n: int) -> List[int]: + return self._sequence_gen.get_next_mult_txn(txn, n) + async def get_next(self): """ Usage: @@ -262,6 +283,34 @@ class MultiWriterIdGenerator: return manager() + async def get_next_mult(self, n: int): + """ + Usage: + with await stream_id_gen.get_next_mult(5) as stream_ids: + # ... persist events ... + """ + next_ids = await self._db.runInteraction( + "_load_next_mult_id", self._load_next_mult_id_txn, n + ) + + # Assert the fetched ID is actually greater than any ID we've already + # seen. If not, then the sequence and table have got out of sync + # somehow. + assert max(self.get_positions().values(), default=0) < min(next_ids) + + with self._lock: + self._unfinished_ids.update(next_ids) + + @contextlib.contextmanager + def manager(): + try: + yield next_ids + finally: + for i in next_ids: + self._mark_id_as_finished(i) + + return manager() + def get_next_txn(self, txn: LoggingTransaction): """ Usage: @@ -326,3 +375,53 @@ class MultiWriterIdGenerator: self._current_positions[instance_name] = max( new_id, self._current_positions.get(instance_name, 0) ) + + self._add_persisted_position(new_id) + + def get_persisted_upto_position(self) -> int: + """Get the max position where all previous positions have been + persisted. + + Note: In the worst case scenario this will be equal to the minimum + position across writers. This means that the returned position here can + lag if one writer doesn't write very often. + """ + + with self._lock: + return self._persisted_upto_position + + def _add_persisted_position(self, new_id: int): + """Record that we have persisted a position. + + This is used to keep the `_current_positions` up to date. + """ + + # We require that the lock is locked by caller + assert self._lock.locked() + + heapq.heappush(self._known_persisted_positions, new_id) + + # We move the current min position up if the minimum current positions + # of all instances is higher (since by definition all positions less + # that that have been persisted). + min_curr = min(self._current_positions.values()) + self._persisted_upto_position = max(min_curr, self._persisted_upto_position) + + # We now iterate through the seen positions, discarding those that are + # less than the current min positions, and incrementing the min position + # if its exactly one greater. + # + # This is also where we discard items from `_known_persisted_positions` + # (to ensure the list doesn't infinitely grow). + while self._known_persisted_positions: + if self._known_persisted_positions[0] <= self._persisted_upto_position: + heapq.heappop(self._known_persisted_positions) + elif ( + self._known_persisted_positions[0] == self._persisted_upto_position + 1 + ): + heapq.heappop(self._known_persisted_positions) + self._persisted_upto_position += 1 + else: + # There was a gap in seen positions, so there is nothing more to + # do. + break diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py index 63dfea4220..ffc1894748 100644 --- a/synapse/storage/util/sequence.py +++ b/synapse/storage/util/sequence.py @@ -14,7 +14,7 @@ # limitations under the License. import abc import threading -from typing import Callable, Optional +from typing import Callable, List, Optional from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine from synapse.storage.types import Cursor @@ -39,6 +39,12 @@ class PostgresSequenceGenerator(SequenceGenerator): txn.execute("SELECT nextval(?)", (self._sequence_name,)) return txn.fetchone()[0] + def get_next_mult_txn(self, txn: Cursor, n: int) -> List[int]: + txn.execute( + "SELECT nextval(?) FROM generate_series(1, ?)", (self._sequence_name, n) + ) + return [i for (i,) in txn] + GetFirstCallbackType = Callable[[Cursor], int] diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index 7a05194653..9b9a183e7f 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -182,3 +182,39 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.assertEqual(id_gen.get_positions(), {"master": 8}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 8) + + def test_get_persisted_upto_position(self): + """Test that `get_persisted_upto_position` correctly tracks updates to + positions. + """ + + self._insert_rows("first", 3) + self._insert_rows("second", 5) + + id_gen = self._create_id_generator("first") + + # Min is 3 and there is a gap between 5, so we expect it to be 3. + self.assertEqual(id_gen.get_persisted_upto_position(), 3) + + # We advance "first" straight to 6. Min is now 5 but there is no gap so + # we expect it to be 6 + id_gen.advance("first", 6) + self.assertEqual(id_gen.get_persisted_upto_position(), 6) + + # No gap, so we expect 7. + id_gen.advance("second", 7) + self.assertEqual(id_gen.get_persisted_upto_position(), 7) + + # We haven't seen 8 yet, so we expect 7 still. + id_gen.advance("second", 9) + self.assertEqual(id_gen.get_persisted_upto_position(), 7) + + # Now that we've seen 7, 8 and 9 we can got straight to 9. + id_gen.advance("first", 8) + self.assertEqual(id_gen.get_persisted_upto_position(), 9) + + # Jump forward with gaps. The minimum is 11, even though we haven't seen + # 10 we know that everything before 11 must be persisted. + id_gen.advance("first", 11) + id_gen.advance("second", 15) + self.assertEqual(id_gen.get_persisted_upto_position(), 11) -- cgit 1.5.1 From 5649b7f3d05f8e550fba8349433eefd09aacce27 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2020 13:20:34 +0100 Subject: Fix missing _add_persisted_position (#8179) This was forgotten in #8164. --- changelog.d/8179.misc | 1 + synapse/storage/util/id_generators.py | 2 ++ tests/storage/test_id_generators.py | 52 +++++++++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 changelog.d/8179.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8179.misc b/changelog.d/8179.misc new file mode 100644 index 0000000000..55bc079cdb --- /dev/null +++ b/changelog.d/8179.misc @@ -0,0 +1 @@ +Add functions to `MultiWriterIdGen` used by events stream. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 5b07847773..b27a4843d0 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -343,6 +343,8 @@ class MultiWriterIdGenerator: curr = self._current_positions.get(self._instance_name, 0) self._current_positions[self._instance_name] = max(curr, next_id) + self._add_persisted_position(next_id) + def get_current_token(self) -> int: """Returns the maximum stream id such that all stream ids less than or equal to it have been successfully persisted. diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index 9b9a183e7f..14ce21c786 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -58,6 +58,10 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): return self.get_success(self.db_pool.runWithConnection(_create)) def _insert_rows(self, instance_name: str, number: int): + """Insert N rows as the given instance, inserting with stream IDs pulled + from the postgres sequence. + """ + def _insert(txn): for _ in range(number): txn.execute( @@ -65,7 +69,20 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): (instance_name,), ) - self.get_success(self.db_pool.runInteraction("test_single_instance", _insert)) + self.get_success(self.db_pool.runInteraction("_insert_rows", _insert)) + + def _insert_row_with_id(self, instance_name: str, stream_id: int): + """Insert one row as the given instance with given stream_id, updating + the postgres sequence position to match. + """ + + def _insert(txn): + txn.execute( + "INSERT INTO foobar VALUES (?, ?)", (stream_id, instance_name,), + ) + txn.execute("SELECT setval('foobar_seq', ?)", (stream_id,)) + + self.get_success(self.db_pool.runInteraction("_insert_row_with_id", _insert)) def test_empty(self): """Test an ID generator against an empty database gives sensible @@ -188,11 +205,17 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): positions. """ - self._insert_rows("first", 3) - self._insert_rows("second", 5) + # The following tests are a bit cheeky in that we notify about new + # positions via `advance` without *actually* advancing the postgres + # sequence. + + self._insert_row_with_id("first", 3) + self._insert_row_with_id("second", 5) id_gen = self._create_id_generator("first") + self.assertEqual(id_gen.get_positions(), {"first": 3, "second": 5}) + # Min is 3 and there is a gap between 5, so we expect it to be 3. self.assertEqual(id_gen.get_persisted_upto_position(), 3) @@ -218,3 +241,26 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): id_gen.advance("first", 11) id_gen.advance("second", 15) self.assertEqual(id_gen.get_persisted_upto_position(), 11) + + def test_get_persisted_upto_position_get_next(self): + """Test that `get_persisted_upto_position` correctly tracks updates to + positions when `get_next` is called. + """ + + self._insert_row_with_id("first", 3) + self._insert_row_with_id("second", 5) + + id_gen = self._create_id_generator("first") + + self.assertEqual(id_gen.get_positions(), {"first": 3, "second": 5}) + + self.assertEqual(id_gen.get_persisted_upto_position(), 3) + with self.get_success(id_gen.get_next()) as stream_id: + self.assertEqual(stream_id, 6) + self.assertEqual(id_gen.get_persisted_upto_position(), 3) + + self.assertEqual(id_gen.get_persisted_upto_position(), 6) + + # We assume that so long as `get_next` does correctly advance the + # `persisted_upto_position` in this case, then it will be correct in the + # other cases that are tested above (since they'll hit the same code). -- cgit 1.5.1 From bbb3c8641ca1214702dbddd88acfe81cc4fc44ae Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 1 Sep 2020 13:36:25 +0100 Subject: Make MultiWriterIDGenerator work for streams that use negative stream IDs (#8203) This is so that we can use it for the backfill events stream. --- changelog.d/8203.misc | 1 + synapse/storage/util/id_generators.py | 39 +++++++++---- tests/storage/test_id_generators.py | 105 ++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 11 deletions(-) create mode 100644 changelog.d/8203.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8203.misc b/changelog.d/8203.misc new file mode 100644 index 0000000000..9fe2224aaa --- /dev/null +++ b/changelog.d/8203.misc @@ -0,0 +1 @@ +Make `MultiWriterIDGenerator` work for streams that use negative values. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index b27a4843d0..9f3d23f0a5 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -185,6 +185,8 @@ class MultiWriterIdGenerator: id_column: Column that stores the stream ID. sequence_name: The name of the postgres sequence used to generate new IDs. + positive: Whether the IDs are positive (true) or negative (false). + When using negative IDs we go backwards from -1 to -2, -3, etc. """ def __init__( @@ -196,13 +198,19 @@ class MultiWriterIdGenerator: instance_column: str, id_column: str, sequence_name: str, + positive: bool = True, ): self._db = db self._instance_name = instance_name + self._positive = positive + self._return_factor = 1 if positive else -1 # We lock as some functions may be called from DB threads. self._lock = threading.Lock() + # Note: If we are a negative stream then we still store all the IDs as + # positive to make life easier for us, and simply negate the IDs when we + # return them. self._current_positions = self._load_current_ids( db_conn, table, instance_column, id_column ) @@ -233,13 +241,16 @@ class MultiWriterIdGenerator: def _load_current_ids( self, db_conn, table: str, instance_column: str, id_column: str ) -> Dict[str, int]: + # If positive stream aggregate via MAX. For negative stream use MIN + # *and* negate the result to get a positive number. sql = """ - SELECT %(instance)s, MAX(%(id)s) FROM %(table)s + SELECT %(instance)s, %(agg)s(%(id)s) FROM %(table)s GROUP BY %(instance)s """ % { "instance": instance_column, "id": id_column, "table": table, + "agg": "MAX" if self._positive else "-MIN", } cur = db_conn.cursor() @@ -269,15 +280,16 @@ class MultiWriterIdGenerator: # Assert the fetched ID is actually greater than what we currently # believe the ID to be. If not, then the sequence and table have got # out of sync somehow. - assert self.get_current_token_for_writer(self._instance_name) < next_id - with self._lock: + assert self._current_positions.get(self._instance_name, 0) < next_id + self._unfinished_ids.add(next_id) @contextlib.contextmanager def manager(): try: - yield next_id + # Multiply by the return factor so that the ID has correct sign. + yield self._return_factor * next_id finally: self._mark_id_as_finished(next_id) @@ -296,15 +308,15 @@ class MultiWriterIdGenerator: # Assert the fetched ID is actually greater than any ID we've already # seen. If not, then the sequence and table have got out of sync # somehow. - assert max(self.get_positions().values(), default=0) < min(next_ids) - with self._lock: + assert max(self._current_positions.values(), default=0) < min(next_ids) + self._unfinished_ids.update(next_ids) @contextlib.contextmanager def manager(): try: - yield next_ids + yield [self._return_factor * i for i in next_ids] finally: for i in next_ids: self._mark_id_as_finished(i) @@ -327,7 +339,7 @@ class MultiWriterIdGenerator: txn.call_after(self._mark_id_as_finished, next_id) txn.call_on_exception(self._mark_id_as_finished, next_id) - return next_id + return self._return_factor * next_id def _mark_id_as_finished(self, next_id: int): """The ID has finished being processed so we should advance the @@ -359,20 +371,25 @@ class MultiWriterIdGenerator: """ with self._lock: - return self._current_positions.get(instance_name, 0) + return self._return_factor * self._current_positions.get(instance_name, 0) def get_positions(self) -> Dict[str, int]: """Get a copy of the current positon map. """ with self._lock: - return dict(self._current_positions) + return { + name: self._return_factor * i + for name, i in self._current_positions.items() + } def advance(self, instance_name: str, new_id: int): """Advance the postion of the named writer to the given ID, if greater than existing entry. """ + new_id *= self._return_factor + with self._lock: self._current_positions[instance_name] = max( new_id, self._current_positions.get(instance_name, 0) @@ -390,7 +407,7 @@ class MultiWriterIdGenerator: """ with self._lock: - return self._persisted_upto_position + return self._return_factor * self._persisted_upto_position def _add_persisted_position(self, new_id: int): """Record that we have persisted a position. diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index 14ce21c786..f0a8e32f1e 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -264,3 +264,108 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): # We assume that so long as `get_next` does correctly advance the # `persisted_upto_position` in this case, then it will be correct in the # other cases that are tested above (since they'll hit the same code). + + +class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): + """Tests MultiWriterIdGenerator that produce *negative* stream IDs. + """ + + if not USE_POSTGRES_FOR_TESTS: + skip = "Requires Postgres" + + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.db_pool = self.store.db_pool # type: DatabasePool + + self.get_success(self.db_pool.runInteraction("_setup_db", self._setup_db)) + + def _setup_db(self, txn): + txn.execute("CREATE SEQUENCE foobar_seq") + txn.execute( + """ + CREATE TABLE foobar ( + stream_id BIGINT NOT NULL, + instance_name TEXT NOT NULL, + data TEXT + ); + """ + ) + + def _create_id_generator(self, instance_name="master") -> MultiWriterIdGenerator: + def _create(conn): + return MultiWriterIdGenerator( + conn, + self.db_pool, + instance_name=instance_name, + table="foobar", + instance_column="instance_name", + id_column="stream_id", + sequence_name="foobar_seq", + positive=False, + ) + + return self.get_success(self.db_pool.runWithConnection(_create)) + + def _insert_row(self, instance_name: str, stream_id: int): + """Insert one row as the given instance with given stream_id. + """ + + def _insert(txn): + txn.execute( + "INSERT INTO foobar VALUES (?, ?)", (stream_id, instance_name,), + ) + + self.get_success(self.db_pool.runInteraction("_insert_row", _insert)) + + def test_single_instance(self): + """Test that reads and writes from a single process are handled + correctly. + """ + id_gen = self._create_id_generator() + + with self.get_success(id_gen.get_next()) as stream_id: + self._insert_row("master", stream_id) + + self.assertEqual(id_gen.get_positions(), {"master": -1}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), -1) + self.assertEqual(id_gen.get_persisted_upto_position(), -1) + + with self.get_success(id_gen.get_next_mult(3)) as stream_ids: + for stream_id in stream_ids: + self._insert_row("master", stream_id) + + self.assertEqual(id_gen.get_positions(), {"master": -4}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), -4) + self.assertEqual(id_gen.get_persisted_upto_position(), -4) + + # Test loading from DB by creating a second ID gen + second_id_gen = self._create_id_generator() + + self.assertEqual(second_id_gen.get_positions(), {"master": -4}) + self.assertEqual(second_id_gen.get_current_token_for_writer("master"), -4) + self.assertEqual(second_id_gen.get_persisted_upto_position(), -4) + + def test_multiple_instance(self): + """Tests that having multiple instances that get advanced over + federation works corretly. + """ + id_gen_1 = self._create_id_generator("first") + id_gen_2 = self._create_id_generator("second") + + with self.get_success(id_gen_1.get_next()) as stream_id: + self._insert_row("first", stream_id) + id_gen_2.advance("first", stream_id) + + self.assertEqual(id_gen_1.get_positions(), {"first": -1}) + self.assertEqual(id_gen_2.get_positions(), {"first": -1}) + self.assertEqual(id_gen_1.get_persisted_upto_position(), -1) + self.assertEqual(id_gen_2.get_persisted_upto_position(), -1) + + with self.get_success(id_gen_2.get_next()) as stream_id: + self._insert_row("second", stream_id) + id_gen_1.advance("second", stream_id) + + self.assertEqual(id_gen_1.get_positions(), {"first": -1, "second": -2}) + self.assertEqual(id_gen_2.get_positions(), {"first": -1, "second": -2}) + self.assertEqual(id_gen_1.get_persisted_upto_position(), -2) + self.assertEqual(id_gen_2.get_persisted_upto_position(), -2) -- cgit 1.5.1 From 82c1ee1c22a87b9e6e3179947014b0f11c0a1ac3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Sep 2020 15:48:37 +0100 Subject: Add experimental support for sharding event persister. (#8170) This is *not* ready for production yet. Caveats: 1. We should write some tests... 2. The stream token that we use for events can get stalled at the minimum position of all writers. This means that new events may not be processed and e.g. sent down sync streams if a writer isn't writing or is slow. --- changelog.d/8170.feature | 1 + synapse/config/_base.py | 21 ++++++- synapse/config/_base.pyi | 1 + synapse/config/workers.py | 37 ++++++++---- synapse/handlers/federation.py | 44 ++++++++++----- synapse/handlers/message.py | 14 +++-- synapse/handlers/room.py | 14 +++-- synapse/handlers/room_member.py | 7 --- synapse/replication/http/federation.py | 12 +++- synapse/replication/tcp/handler.py | 2 +- synapse/replication/tcp/streams/events.py | 4 +- synapse/storage/databases/__init__.py | 2 +- synapse/storage/databases/main/event_federation.py | 2 +- synapse/storage/databases/main/events.py | 4 +- synapse/storage/databases/main/events_worker.py | 66 +++++++++++++++------- .../schema/delta/58/14events_instance_name.sql | 16 ++++++ .../delta/58/14events_instance_name.sql.postgres | 26 +++++++++ synapse/storage/util/id_generators.py | 10 ++-- 18 files changed, 206 insertions(+), 77 deletions(-) create mode 100644 changelog.d/8170.feature create mode 100644 synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql create mode 100644 synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8170.feature b/changelog.d/8170.feature new file mode 100644 index 0000000000..b363e929ea --- /dev/null +++ b/changelog.d/8170.feature @@ -0,0 +1 @@ +Add experimental support for sharding event persister. diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 1417487427..73f0717b0d 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -832,11 +832,26 @@ class ShardedWorkerHandlingConfig: def should_handle(self, instance_name: str, key: str) -> bool: """Whether this instance is responsible for handling the given key. """ - - # If multiple instances are not defined we always return true. + # If multiple instances are not defined we always return true if not self.instances or len(self.instances) == 1: return True + return self.get_instance(key) == instance_name + + def get_instance(self, key: str) -> str: + """Get the instance responsible for handling the given key. + + Note: For things like federation sending the config for which instance + is sending is known only to the sender instance if there is only one. + Therefore `should_handle` should be used where possible. + """ + + if not self.instances: + return "master" + + if len(self.instances) == 1: + return self.instances[0] + # We shard by taking the hash, modulo it by the number of instances and # then checking whether this instance matches the instance at that # index. @@ -846,7 +861,7 @@ class ShardedWorkerHandlingConfig: dest_hash = sha256(key.encode("utf8")).digest() dest_int = int.from_bytes(dest_hash, byteorder="little") remainder = dest_int % (len(self.instances)) - return self.instances[remainder] == instance_name + return self.instances[remainder] __all__ = ["Config", "RootConfig", "ShardedWorkerHandlingConfig"] diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index eb911e8f9f..b8faafa9bd 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -142,3 +142,4 @@ class ShardedWorkerHandlingConfig: instances: List[str] def __init__(self, instances: List[str]) -> None: ... def should_handle(self, instance_name: str, key: str) -> bool: ... + def get_instance(self, key: str) -> str: ... diff --git a/synapse/config/workers.py b/synapse/config/workers.py index c784a71508..f23e42cdf9 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -13,12 +13,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List, Union + import attr from ._base import Config, ConfigError, ShardedWorkerHandlingConfig from .server import ListenerConfig, parse_listener_def +def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: + """Helper for allowing parsing a string or list of strings to a config + option expecting a list of strings. + """ + + if isinstance(obj, str): + return [obj] + return obj + + @attr.s class InstanceLocationConfig: """The host and port to talk to an instance via HTTP replication. @@ -33,11 +45,13 @@ class WriterLocations: """Specifies the instances that write various streams. Attributes: - events: The instance that writes to the event and backfill streams. - events: The instance that writes to the typing stream. + events: The instances that write to the event and backfill streams. + typing: The instance that writes to the typing stream. """ - events = attr.ib(default="master", type=str) + events = attr.ib( + default=["master"], type=List[str], converter=_instance_to_list_converter + ) typing = attr.ib(default="master", type=str) @@ -105,15 +119,18 @@ class WorkerConfig(Config): writers = config.get("stream_writers") or {} self.writers = WriterLocations(**writers) - # Check that the configured writer for events and typing also appears in + # Check that the configured writers for events and typing also appears in # `instance_map`. for stream in ("events", "typing"): - instance = getattr(self.writers, stream) - if instance != "master" and instance not in self.instance_map: - raise ConfigError( - "Instance %r is configured to write %s but does not appear in `instance_map` config." - % (instance, stream) - ) + instances = _instance_to_list_converter(getattr(self.writers, stream)) + for instance in instances: + if instance != "master" and instance not in self.instance_map: + raise ConfigError( + "Instance %r is configured to write %s but does not appear in `instance_map` config." + % (instance, stream) + ) + + self.events_shard_config = ShardedWorkerHandlingConfig(self.writers.events) def generate_config_section(self, config_dir_path, server_name, **kwargs): return """\ diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 16389a0dca..bd8efbb768 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -923,7 +923,8 @@ class FederationHandler(BaseHandler): ) ) - await self._handle_new_events(dest, ev_infos, backfilled=True) + if ev_infos: + await self._handle_new_events(dest, room_id, ev_infos, backfilled=True) # Step 2: Persist the rest of the events in the chunk one by one events.sort(key=lambda e: e.depth) @@ -1216,7 +1217,7 @@ class FederationHandler(BaseHandler): event_infos.append(_NewEventInfo(event, None, auth)) await self._handle_new_events( - destination, event_infos, + destination, room_id, event_infos, ) def _sanity_check_event(self, ev): @@ -1363,15 +1364,15 @@ class FederationHandler(BaseHandler): ) max_stream_id = await self._persist_auth_tree( - origin, auth_chain, state, event, room_version_obj + origin, room_id, auth_chain, state, event, room_version_obj ) # We wait here until this instance has seen the events come down # replication (if we're using replication) as the below uses caches. - # - # TODO: Currently the events stream is written to from master await self._replication.wait_for_stream_position( - self.config.worker.writers.events, "events", max_stream_id + self.config.worker.events_shard_config.get_instance(room_id), + "events", + max_stream_id, ) # Check whether this room is the result of an upgrade of a room we already know @@ -1625,7 +1626,7 @@ class FederationHandler(BaseHandler): ) context = await self.state_handler.compute_event_context(event) - await self.persist_events_and_notify([(event, context)]) + await self.persist_events_and_notify(event.room_id, [(event, context)]) return event @@ -1652,7 +1653,9 @@ class FederationHandler(BaseHandler): await self.federation_client.send_leave(host_list, event) context = await self.state_handler.compute_event_context(event) - stream_id = await self.persist_events_and_notify([(event, context)]) + stream_id = await self.persist_events_and_notify( + event.room_id, [(event, context)] + ) return event, stream_id @@ -1900,7 +1903,7 @@ class FederationHandler(BaseHandler): ) await self.persist_events_and_notify( - [(event, context)], backfilled=backfilled + event.room_id, [(event, context)], backfilled=backfilled ) except Exception: run_in_background( @@ -1913,6 +1916,7 @@ class FederationHandler(BaseHandler): async def _handle_new_events( self, origin: str, + room_id: str, event_infos: Iterable[_NewEventInfo], backfilled: bool = False, ) -> None: @@ -1944,6 +1948,7 @@ class FederationHandler(BaseHandler): ) await self.persist_events_and_notify( + room_id, [ (ev_info.event, context) for ev_info, context in zip(event_infos, contexts) @@ -1954,6 +1959,7 @@ class FederationHandler(BaseHandler): async def _persist_auth_tree( self, origin: str, + room_id: str, auth_events: List[EventBase], state: List[EventBase], event: EventBase, @@ -1968,6 +1974,7 @@ class FederationHandler(BaseHandler): Args: origin: Where the events came from + room_id, auth_events state event @@ -2042,17 +2049,20 @@ class FederationHandler(BaseHandler): events_to_context[e.event_id].rejected = RejectedReason.AUTH_ERROR await self.persist_events_and_notify( + room_id, [ (e, events_to_context[e.event_id]) for e in itertools.chain(auth_events, state) - ] + ], ) new_event_context = await self.state_handler.compute_event_context( event, old_state=state ) - return await self.persist_events_and_notify([(event, new_event_context)]) + return await self.persist_events_and_notify( + room_id, [(event, new_event_context)] + ) async def _prep_event( self, @@ -2903,6 +2913,7 @@ class FederationHandler(BaseHandler): async def persist_events_and_notify( self, + room_id: str, event_and_contexts: Sequence[Tuple[EventBase, EventContext]], backfilled: bool = False, ) -> int: @@ -2910,14 +2921,19 @@ class FederationHandler(BaseHandler): necessary. Args: - event_and_contexts: + room_id: The room ID of events being persisted. + event_and_contexts: Sequence of events with their associated + context that should be persisted. All events must belong to + the same room. backfilled: Whether these events are a result of backfilling or not """ - if self.config.worker.writers.events != self._instance_name: + instance = self.config.worker.events_shard_config.get_instance(room_id) + if instance != self._instance_name: result = await self._send_events( - instance_name=self.config.worker.writers.events, + instance_name=instance, store=self.store, + room_id=room_id, event_and_contexts=event_and_contexts, backfilled=backfilled, ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 72bb638167..6398774c02 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -376,9 +376,8 @@ class EventCreationHandler(object): self.notifier = hs.get_notifier() self.config = hs.config self.require_membership_for_aliases = hs.config.require_membership_for_aliases - self._is_event_writer = ( - self.config.worker.writers.events == hs.get_instance_name() - ) + self._events_shard_config = self.config.worker.events_shard_config + self._instance_name = hs.get_instance_name() self.room_invite_state_types = self.hs.config.room_invite_state_types @@ -904,9 +903,10 @@ class EventCreationHandler(object): try: # If we're a worker we need to hit out to the master. - if not self._is_event_writer: + writer_instance = self._events_shard_config.get_instance(event.room_id) + if writer_instance != self._instance_name: result = await self.send_event( - instance_name=self.config.worker.writers.events, + instance_name=writer_instance, event_id=event.event_id, store=self.store, requester=requester, @@ -974,7 +974,9 @@ class EventCreationHandler(object): This should only be run on the instance in charge of persisting events. """ - assert self._is_event_writer + assert self._events_shard_config.should_handle( + self._instance_name, event.room_id + ) if ratelimit: # We check if this is a room admin redacting an event so that we diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 9d5b1828df..55794c3057 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -804,7 +804,9 @@ class RoomCreationHandler(BaseHandler): # Always wait for room creation to progate before returning await self._replication.wait_for_stream_position( - self.hs.config.worker.writers.events, "events", last_stream_id + self.hs.config.worker.events_shard_config.get_instance(room_id), + "events", + last_stream_id, ) return result, last_stream_id @@ -1260,10 +1262,10 @@ class RoomShutdownHandler(object): # We now wait for the create room to come back in via replication so # that we can assume that all the joins/invites have propogated before # we try and auto join below. - # - # TODO: Currently the events stream is written to from master await self._replication.wait_for_stream_position( - self.hs.config.worker.writers.events, "events", stream_id + self.hs.config.worker.events_shard_config.get_instance(new_room_id), + "events", + stream_id, ) else: new_room_id = None @@ -1293,7 +1295,9 @@ class RoomShutdownHandler(object): # Wait for leave to come in over replication before trying to forget. await self._replication.wait_for_stream_position( - self.hs.config.worker.writers.events, "events", stream_id + self.hs.config.worker.events_shard_config.get_instance(room_id), + "events", + stream_id, ) await self.room_member_handler.forget(target_requester.user, room_id) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a7962b0ada..e5cb3c2d5c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -82,13 +82,6 @@ class RoomMemberHandler(object): self._enable_lookup = hs.config.enable_3pid_lookup self.allow_per_room_profiles = self.config.allow_per_room_profiles - self._event_stream_writer_instance = hs.config.worker.writers.events - self._is_on_event_persistence_instance = ( - self._event_stream_writer_instance == hs.get_instance_name() - ) - if self._is_on_event_persistence_instance: - self.persist_event_storage = hs.get_storage().persistence - self._join_rate_limiter_local = Ratelimiter( clock=self.clock, rate_hz=hs.config.ratelimiting.rc_joins_local.per_second, diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 6b56315148..5c8be747e1 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -65,10 +65,11 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): self.federation_handler = hs.get_handlers().federation_handler @staticmethod - async def _serialize_payload(store, event_and_contexts, backfilled): + async def _serialize_payload(store, room_id, event_and_contexts, backfilled): """ Args: store + room_id (str) event_and_contexts (list[tuple[FrozenEvent, EventContext]]) backfilled (bool): Whether or not the events are the result of backfilling @@ -88,7 +89,11 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): } ) - payload = {"events": event_payloads, "backfilled": backfilled} + payload = { + "events": event_payloads, + "backfilled": backfilled, + "room_id": room_id, + } return payload @@ -96,6 +101,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): with Measure(self.clock, "repl_fed_send_events_parse"): content = parse_json_object_from_request(request) + room_id = content["room_id"] backfilled = content["backfilled"] event_payloads = content["events"] @@ -120,7 +126,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): logger.info("Got %d events from federation", len(event_and_contexts)) max_stream_id = await self.federation_handler.persist_events_and_notify( - event_and_contexts, backfilled + room_id, event_and_contexts, backfilled ) return 200, {"max_stream_id": max_stream_id} diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index 1c303f3a46..b323841f73 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -109,7 +109,7 @@ class ReplicationCommandHandler: if isinstance(stream, (EventsStream, BackfillStream)): # Only add EventStream and BackfillStream as a source on the # instance in charge of event persistence. - if hs.config.worker.writers.events == hs.get_instance_name(): + if hs.get_instance_name() in hs.config.worker.writers.events: self._streams_to_replicate.append(stream) continue diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index 16c63ff4ec..3705618b4f 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -19,7 +19,7 @@ from typing import List, Tuple, Type import attr -from ._base import Stream, StreamUpdateResult, Token, current_token_without_instance +from ._base import Stream, StreamUpdateResult, Token """Handling of the 'events' replication stream @@ -117,7 +117,7 @@ class EventsStream(Stream): self._store = hs.get_datastore() super().__init__( hs.get_instance_name(), - current_token_without_instance(self._store.get_current_events_token), + self._store._stream_id_gen.get_current_token_for_writer, self._update_function, ) diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py index 0ac854aee2..c73d54fb67 100644 --- a/synapse/storage/databases/__init__.py +++ b/synapse/storage/databases/__init__.py @@ -68,7 +68,7 @@ class Databases(object): # If we're on a process that can persist events also # instantiate a `PersistEventsStore` - if hs.config.worker.writers.events == hs.get_instance_name(): + if hs.get_instance_name() in hs.config.worker.writers.events: persist_events = PersistEventsStore(hs, database, main) if "state" in database_config.databases: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 0b69aa6a94..4c3c162acf 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -438,7 +438,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas """ if stream_ordering <= self.stream_ordering_month_ago: - raise StoreError(400, "stream_ordering too old") + raise StoreError(400, "stream_ordering too old %s" % (stream_ordering,)) sql = """ SELECT event_id FROM stream_ordering_to_exterm diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 6313b41eef..46b11e705b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -97,6 +97,7 @@ class PersistEventsStore: self.store = main_data_store self.database_engine = db.engine self._clock = hs.get_clock() + self._instance_name = hs.get_instance_name() self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages self.is_mine_id = hs.is_mine_id @@ -108,7 +109,7 @@ class PersistEventsStore: # This should only exist on instances that are configured to write assert ( - hs.config.worker.writers.events == hs.get_instance_name() + hs.get_instance_name() in hs.config.worker.writers.events ), "Can only instantiate EventsStore on master" async def _persist_events_and_state_updates( @@ -800,6 +801,7 @@ class PersistEventsStore: table="events", values=[ { + "instance_name": self._instance_name, "stream_ordering": event.internal_metadata.stream_ordering, "topological_ordering": event.depth, "depth": event.depth, diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index a7a73cc3d8..17f5997b89 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -42,7 +42,8 @@ from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool -from synapse.storage.util.id_generators import StreamIdGenerator +from synapse.storage.engines import PostgresEngine +from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.types import Collection, get_domain_from_id from synapse.util.caches.descriptors import Cache, cached from synapse.util.iterutils import batch_iter @@ -78,27 +79,54 @@ class EventsWorkerStore(SQLBaseStore): def __init__(self, database: DatabasePool, db_conn, hs): super(EventsWorkerStore, self).__init__(database, db_conn, hs) - if hs.config.worker.writers.events == hs.get_instance_name(): - # We are the process in charge of generating stream ids for events, - # so instantiate ID generators based on the database - self._stream_id_gen = StreamIdGenerator( - db_conn, "events", "stream_ordering", + if isinstance(database.engine, PostgresEngine): + # If we're using Postgres than we can use `MultiWriterIdGenerator` + # regardless of whether this process writes to the streams or not. + self._stream_id_gen = MultiWriterIdGenerator( + db_conn=db_conn, + db=database, + instance_name=hs.get_instance_name(), + table="events", + instance_column="instance_name", + id_column="stream_ordering", + sequence_name="events_stream_seq", ) - self._backfill_id_gen = StreamIdGenerator( - db_conn, - "events", - "stream_ordering", - step=-1, - extra_tables=[("ex_outlier_stream", "event_stream_ordering")], + self._backfill_id_gen = MultiWriterIdGenerator( + db_conn=db_conn, + db=database, + instance_name=hs.get_instance_name(), + table="events", + instance_column="instance_name", + id_column="stream_ordering", + sequence_name="events_backfill_stream_seq", + positive=False, ) else: - # Another process is in charge of persisting events and generating - # stream IDs: rely on the replication streams to let us know which - # IDs we can process. - self._stream_id_gen = SlavedIdTracker(db_conn, "events", "stream_ordering") - self._backfill_id_gen = SlavedIdTracker( - db_conn, "events", "stream_ordering", step=-1 - ) + # We shouldn't be running in worker mode with SQLite, but its useful + # to support it for unit tests. + # + # If this process is the writer than we need to use + # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets + # updated over replication. (Multiple writers are not supported for + # SQLite). + if hs.get_instance_name() in hs.config.worker.writers.events: + self._stream_id_gen = StreamIdGenerator( + db_conn, "events", "stream_ordering", + ) + self._backfill_id_gen = StreamIdGenerator( + db_conn, + "events", + "stream_ordering", + step=-1, + extra_tables=[("ex_outlier_stream", "event_stream_ordering")], + ) + else: + self._stream_id_gen = SlavedIdTracker( + db_conn, "events", "stream_ordering" + ) + self._backfill_id_gen = SlavedIdTracker( + db_conn, "events", "stream_ordering", step=-1 + ) self._get_event_cache = Cache( "*getEvent*", diff --git a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql new file mode 100644 index 0000000000..98ff76d709 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql @@ -0,0 +1,16 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE events ADD COLUMN instance_name TEXT; diff --git a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres new file mode 100644 index 0000000000..97c1e6a0c5 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres @@ -0,0 +1,26 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SEQUENCE IF NOT EXISTS events_stream_seq; + +SELECT setval('events_stream_seq', ( + SELECT COALESCE(MAX(stream_ordering), 1) FROM events +)); + +CREATE SEQUENCE IF NOT EXISTS events_backfill_stream_seq; + +SELECT setval('events_backfill_stream_seq', ( + SELECT COALESCE(-MIN(stream_ordering), 1) FROM events +)); diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 9f3d23f0a5..8fd21c2bf8 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -231,8 +231,12 @@ class MultiWriterIdGenerator: # gaps should be relatively rare it's still worth doing the book keeping # that allows us to skip forwards when there are gapless runs of # positions. + # + # We start at 1 here as a) the first generated stream ID will be 2, and + # b) other parts of the code assume that stream IDs are strictly greater + # than 0. self._persisted_upto_position = ( - min(self._current_positions.values()) if self._current_positions else 0 + min(self._current_positions.values()) if self._current_positions else 1 ) self._known_persisted_positions = [] # type: List[int] @@ -362,9 +366,7 @@ class MultiWriterIdGenerator: equal to it have been successfully persisted. """ - # Currently we don't support this operation, as it's not obvious how to - # condense the stream positions of multiple writers into a single int. - raise NotImplementedError() + return self.get_persisted_upto_position() def get_current_token_for_writer(self, instance_name: str) -> int: """Returns the position of the given writer. -- cgit 1.5.1 From 9f8abdcc3828e942f09cc62e29dff93dbaa01ec7 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 4 Sep 2020 10:19:42 +0100 Subject: Revert "Add experimental support for sharding event persister. (#8170)" (#8242) * Revert "Add experimental support for sharding event persister. (#8170)" This reverts commit 82c1ee1c22a87b9e6e3179947014b0f11c0a1ac3. * Changelog --- changelog.d/8170.feature | 1 - changelog.d/8242.feature | 1 + synapse/config/_base.py | 21 +------ synapse/config/_base.pyi | 1 - synapse/config/workers.py | 37 ++++-------- synapse/handlers/federation.py | 44 +++++---------- synapse/handlers/message.py | 14 ++--- synapse/handlers/room.py | 14 ++--- synapse/handlers/room_member.py | 7 +++ synapse/replication/http/federation.py | 12 +--- synapse/replication/tcp/handler.py | 2 +- synapse/replication/tcp/streams/events.py | 4 +- synapse/storage/databases/__init__.py | 2 +- synapse/storage/databases/main/event_federation.py | 2 +- synapse/storage/databases/main/events.py | 4 +- synapse/storage/databases/main/events_worker.py | 66 +++++++--------------- .../schema/delta/58/14events_instance_name.sql | 16 ------ .../delta/58/14events_instance_name.sql.postgres | 26 --------- synapse/storage/util/id_generators.py | 10 ++-- 19 files changed, 78 insertions(+), 206 deletions(-) delete mode 100644 changelog.d/8170.feature create mode 100644 changelog.d/8242.feature delete mode 100644 synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql delete mode 100644 synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8170.feature b/changelog.d/8170.feature deleted file mode 100644 index b363e929ea..0000000000 --- a/changelog.d/8170.feature +++ /dev/null @@ -1 +0,0 @@ -Add experimental support for sharding event persister. diff --git a/changelog.d/8242.feature b/changelog.d/8242.feature new file mode 100644 index 0000000000..f6891e360d --- /dev/null +++ b/changelog.d/8242.feature @@ -0,0 +1 @@ +Back out experimental support for sharding event persister. **PLEASE REMOVE THIS LINE FROM THE FINAL CHANGELOG** diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 73f0717b0d..1417487427 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -832,26 +832,11 @@ class ShardedWorkerHandlingConfig: def should_handle(self, instance_name: str, key: str) -> bool: """Whether this instance is responsible for handling the given key. """ - # If multiple instances are not defined we always return true + + # If multiple instances are not defined we always return true. if not self.instances or len(self.instances) == 1: return True - return self.get_instance(key) == instance_name - - def get_instance(self, key: str) -> str: - """Get the instance responsible for handling the given key. - - Note: For things like federation sending the config for which instance - is sending is known only to the sender instance if there is only one. - Therefore `should_handle` should be used where possible. - """ - - if not self.instances: - return "master" - - if len(self.instances) == 1: - return self.instances[0] - # We shard by taking the hash, modulo it by the number of instances and # then checking whether this instance matches the instance at that # index. @@ -861,7 +846,7 @@ class ShardedWorkerHandlingConfig: dest_hash = sha256(key.encode("utf8")).digest() dest_int = int.from_bytes(dest_hash, byteorder="little") remainder = dest_int % (len(self.instances)) - return self.instances[remainder] + return self.instances[remainder] == instance_name __all__ = ["Config", "RootConfig", "ShardedWorkerHandlingConfig"] diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index b8faafa9bd..eb911e8f9f 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -142,4 +142,3 @@ class ShardedWorkerHandlingConfig: instances: List[str] def __init__(self, instances: List[str]) -> None: ... def should_handle(self, instance_name: str, key: str) -> bool: ... - def get_instance(self, key: str) -> str: ... diff --git a/synapse/config/workers.py b/synapse/config/workers.py index f23e42cdf9..c784a71508 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -13,24 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Union - import attr from ._base import Config, ConfigError, ShardedWorkerHandlingConfig from .server import ListenerConfig, parse_listener_def -def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: - """Helper for allowing parsing a string or list of strings to a config - option expecting a list of strings. - """ - - if isinstance(obj, str): - return [obj] - return obj - - @attr.s class InstanceLocationConfig: """The host and port to talk to an instance via HTTP replication. @@ -45,13 +33,11 @@ class WriterLocations: """Specifies the instances that write various streams. Attributes: - events: The instances that write to the event and backfill streams. - typing: The instance that writes to the typing stream. + events: The instance that writes to the event and backfill streams. + events: The instance that writes to the typing stream. """ - events = attr.ib( - default=["master"], type=List[str], converter=_instance_to_list_converter - ) + events = attr.ib(default="master", type=str) typing = attr.ib(default="master", type=str) @@ -119,18 +105,15 @@ class WorkerConfig(Config): writers = config.get("stream_writers") or {} self.writers = WriterLocations(**writers) - # Check that the configured writers for events and typing also appears in + # Check that the configured writer for events and typing also appears in # `instance_map`. for stream in ("events", "typing"): - instances = _instance_to_list_converter(getattr(self.writers, stream)) - for instance in instances: - if instance != "master" and instance not in self.instance_map: - raise ConfigError( - "Instance %r is configured to write %s but does not appear in `instance_map` config." - % (instance, stream) - ) - - self.events_shard_config = ShardedWorkerHandlingConfig(self.writers.events) + instance = getattr(self.writers, stream) + if instance != "master" and instance not in self.instance_map: + raise ConfigError( + "Instance %r is configured to write %s but does not appear in `instance_map` config." + % (instance, stream) + ) def generate_config_section(self, config_dir_path, server_name, **kwargs): return """\ diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 310c7f7138..43f2986f89 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -923,8 +923,7 @@ class FederationHandler(BaseHandler): ) ) - if ev_infos: - await self._handle_new_events(dest, room_id, ev_infos, backfilled=True) + await self._handle_new_events(dest, ev_infos, backfilled=True) # Step 2: Persist the rest of the events in the chunk one by one events.sort(key=lambda e: e.depth) @@ -1217,7 +1216,7 @@ class FederationHandler(BaseHandler): event_infos.append(_NewEventInfo(event, None, auth)) await self._handle_new_events( - destination, room_id, event_infos, + destination, event_infos, ) def _sanity_check_event(self, ev): @@ -1364,15 +1363,15 @@ class FederationHandler(BaseHandler): ) max_stream_id = await self._persist_auth_tree( - origin, room_id, auth_chain, state, event, room_version_obj + origin, auth_chain, state, event, room_version_obj ) # We wait here until this instance has seen the events come down # replication (if we're using replication) as the below uses caches. + # + # TODO: Currently the events stream is written to from master await self._replication.wait_for_stream_position( - self.config.worker.events_shard_config.get_instance(room_id), - "events", - max_stream_id, + self.config.worker.writers.events, "events", max_stream_id ) # Check whether this room is the result of an upgrade of a room we already know @@ -1626,7 +1625,7 @@ class FederationHandler(BaseHandler): ) context = await self.state_handler.compute_event_context(event) - await self.persist_events_and_notify(event.room_id, [(event, context)]) + await self.persist_events_and_notify([(event, context)]) return event @@ -1653,9 +1652,7 @@ class FederationHandler(BaseHandler): await self.federation_client.send_leave(host_list, event) context = await self.state_handler.compute_event_context(event) - stream_id = await self.persist_events_and_notify( - event.room_id, [(event, context)] - ) + stream_id = await self.persist_events_and_notify([(event, context)]) return event, stream_id @@ -1903,7 +1900,7 @@ class FederationHandler(BaseHandler): ) await self.persist_events_and_notify( - event.room_id, [(event, context)], backfilled=backfilled + [(event, context)], backfilled=backfilled ) except Exception: run_in_background( @@ -1916,7 +1913,6 @@ class FederationHandler(BaseHandler): async def _handle_new_events( self, origin: str, - room_id: str, event_infos: Iterable[_NewEventInfo], backfilled: bool = False, ) -> None: @@ -1948,7 +1944,6 @@ class FederationHandler(BaseHandler): ) await self.persist_events_and_notify( - room_id, [ (ev_info.event, context) for ev_info, context in zip(event_infos, contexts) @@ -1959,7 +1954,6 @@ class FederationHandler(BaseHandler): async def _persist_auth_tree( self, origin: str, - room_id: str, auth_events: List[EventBase], state: List[EventBase], event: EventBase, @@ -1974,7 +1968,6 @@ class FederationHandler(BaseHandler): Args: origin: Where the events came from - room_id, auth_events state event @@ -2049,20 +2042,17 @@ class FederationHandler(BaseHandler): events_to_context[e.event_id].rejected = RejectedReason.AUTH_ERROR await self.persist_events_and_notify( - room_id, [ (e, events_to_context[e.event_id]) for e in itertools.chain(auth_events, state) - ], + ] ) new_event_context = await self.state_handler.compute_event_context( event, old_state=state ) - return await self.persist_events_and_notify( - room_id, [(event, new_event_context)] - ) + return await self.persist_events_and_notify([(event, new_event_context)]) async def _prep_event( self, @@ -2913,7 +2903,6 @@ class FederationHandler(BaseHandler): async def persist_events_and_notify( self, - room_id: str, event_and_contexts: Sequence[Tuple[EventBase, EventContext]], backfilled: bool = False, ) -> int: @@ -2921,19 +2910,14 @@ class FederationHandler(BaseHandler): necessary. Args: - room_id: The room ID of events being persisted. - event_and_contexts: Sequence of events with their associated - context that should be persisted. All events must belong to - the same room. + event_and_contexts: backfilled: Whether these events are a result of backfilling or not """ - instance = self.config.worker.events_shard_config.get_instance(room_id) - if instance != self._instance_name: + if self.config.worker.writers.events != self._instance_name: result = await self._send_events( - instance_name=instance, + instance_name=self.config.worker.writers.events, store=self.store, - room_id=room_id, event_and_contexts=event_and_contexts, backfilled=backfilled, ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6398774c02..72bb638167 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -376,8 +376,9 @@ class EventCreationHandler(object): self.notifier = hs.get_notifier() self.config = hs.config self.require_membership_for_aliases = hs.config.require_membership_for_aliases - self._events_shard_config = self.config.worker.events_shard_config - self._instance_name = hs.get_instance_name() + self._is_event_writer = ( + self.config.worker.writers.events == hs.get_instance_name() + ) self.room_invite_state_types = self.hs.config.room_invite_state_types @@ -903,10 +904,9 @@ class EventCreationHandler(object): try: # If we're a worker we need to hit out to the master. - writer_instance = self._events_shard_config.get_instance(event.room_id) - if writer_instance != self._instance_name: + if not self._is_event_writer: result = await self.send_event( - instance_name=writer_instance, + instance_name=self.config.worker.writers.events, event_id=event.event_id, store=self.store, requester=requester, @@ -974,9 +974,7 @@ class EventCreationHandler(object): This should only be run on the instance in charge of persisting events. """ - assert self._events_shard_config.should_handle( - self._instance_name, event.room_id - ) + assert self._is_event_writer if ratelimit: # We check if this is a room admin redacting an event so that we diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 55794c3057..9d5b1828df 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -804,9 +804,7 @@ class RoomCreationHandler(BaseHandler): # Always wait for room creation to progate before returning await self._replication.wait_for_stream_position( - self.hs.config.worker.events_shard_config.get_instance(room_id), - "events", - last_stream_id, + self.hs.config.worker.writers.events, "events", last_stream_id ) return result, last_stream_id @@ -1262,10 +1260,10 @@ class RoomShutdownHandler(object): # We now wait for the create room to come back in via replication so # that we can assume that all the joins/invites have propogated before # we try and auto join below. + # + # TODO: Currently the events stream is written to from master await self._replication.wait_for_stream_position( - self.hs.config.worker.events_shard_config.get_instance(new_room_id), - "events", - stream_id, + self.hs.config.worker.writers.events, "events", stream_id ) else: new_room_id = None @@ -1295,9 +1293,7 @@ class RoomShutdownHandler(object): # Wait for leave to come in over replication before trying to forget. await self._replication.wait_for_stream_position( - self.hs.config.worker.events_shard_config.get_instance(room_id), - "events", - stream_id, + self.hs.config.worker.writers.events, "events", stream_id ) await self.room_member_handler.forget(target_requester.user, room_id) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index e5cb3c2d5c..a7962b0ada 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -82,6 +82,13 @@ class RoomMemberHandler(object): self._enable_lookup = hs.config.enable_3pid_lookup self.allow_per_room_profiles = self.config.allow_per_room_profiles + self._event_stream_writer_instance = hs.config.worker.writers.events + self._is_on_event_persistence_instance = ( + self._event_stream_writer_instance == hs.get_instance_name() + ) + if self._is_on_event_persistence_instance: + self.persist_event_storage = hs.get_storage().persistence + self._join_rate_limiter_local = Ratelimiter( clock=self.clock, rate_hz=hs.config.ratelimiting.rc_joins_local.per_second, diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 5c8be747e1..6b56315148 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -65,11 +65,10 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): self.federation_handler = hs.get_handlers().federation_handler @staticmethod - async def _serialize_payload(store, room_id, event_and_contexts, backfilled): + async def _serialize_payload(store, event_and_contexts, backfilled): """ Args: store - room_id (str) event_and_contexts (list[tuple[FrozenEvent, EventContext]]) backfilled (bool): Whether or not the events are the result of backfilling @@ -89,11 +88,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): } ) - payload = { - "events": event_payloads, - "backfilled": backfilled, - "room_id": room_id, - } + payload = {"events": event_payloads, "backfilled": backfilled} return payload @@ -101,7 +96,6 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): with Measure(self.clock, "repl_fed_send_events_parse"): content = parse_json_object_from_request(request) - room_id = content["room_id"] backfilled = content["backfilled"] event_payloads = content["events"] @@ -126,7 +120,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): logger.info("Got %d events from federation", len(event_and_contexts)) max_stream_id = await self.federation_handler.persist_events_and_notify( - room_id, event_and_contexts, backfilled + event_and_contexts, backfilled ) return 200, {"max_stream_id": max_stream_id} diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index b323841f73..1c303f3a46 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -109,7 +109,7 @@ class ReplicationCommandHandler: if isinstance(stream, (EventsStream, BackfillStream)): # Only add EventStream and BackfillStream as a source on the # instance in charge of event persistence. - if hs.get_instance_name() in hs.config.worker.writers.events: + if hs.config.worker.writers.events == hs.get_instance_name(): self._streams_to_replicate.append(stream) continue diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index 3705618b4f..16c63ff4ec 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -19,7 +19,7 @@ from typing import List, Tuple, Type import attr -from ._base import Stream, StreamUpdateResult, Token +from ._base import Stream, StreamUpdateResult, Token, current_token_without_instance """Handling of the 'events' replication stream @@ -117,7 +117,7 @@ class EventsStream(Stream): self._store = hs.get_datastore() super().__init__( hs.get_instance_name(), - self._store._stream_id_gen.get_current_token_for_writer, + current_token_without_instance(self._store.get_current_events_token), self._update_function, ) diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py index c73d54fb67..0ac854aee2 100644 --- a/synapse/storage/databases/__init__.py +++ b/synapse/storage/databases/__init__.py @@ -68,7 +68,7 @@ class Databases(object): # If we're on a process that can persist events also # instantiate a `PersistEventsStore` - if hs.get_instance_name() in hs.config.worker.writers.events: + if hs.config.worker.writers.events == hs.get_instance_name(): persist_events = PersistEventsStore(hs, database, main) if "state" in database_config.databases: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 4c3c162acf..0b69aa6a94 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -438,7 +438,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas """ if stream_ordering <= self.stream_ordering_month_ago: - raise StoreError(400, "stream_ordering too old %s" % (stream_ordering,)) + raise StoreError(400, "stream_ordering too old") sql = """ SELECT event_id FROM stream_ordering_to_exterm diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b94fe7ac17..b3d27a2ee7 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -97,7 +97,6 @@ class PersistEventsStore: self.store = main_data_store self.database_engine = db.engine self._clock = hs.get_clock() - self._instance_name = hs.get_instance_name() self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages self.is_mine_id = hs.is_mine_id @@ -109,7 +108,7 @@ class PersistEventsStore: # This should only exist on instances that are configured to write assert ( - hs.get_instance_name() in hs.config.worker.writers.events + hs.config.worker.writers.events == hs.get_instance_name() ), "Can only instantiate EventsStore on master" async def _persist_events_and_state_updates( @@ -801,7 +800,6 @@ class PersistEventsStore: table="events", values=[ { - "instance_name": self._instance_name, "stream_ordering": event.internal_metadata.stream_ordering, "topological_ordering": event.depth, "depth": event.depth, diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 17f5997b89..a7a73cc3d8 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -42,8 +42,7 @@ from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool -from synapse.storage.engines import PostgresEngine -from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.types import Collection, get_domain_from_id from synapse.util.caches.descriptors import Cache, cached from synapse.util.iterutils import batch_iter @@ -79,54 +78,27 @@ class EventsWorkerStore(SQLBaseStore): def __init__(self, database: DatabasePool, db_conn, hs): super(EventsWorkerStore, self).__init__(database, db_conn, hs) - if isinstance(database.engine, PostgresEngine): - # If we're using Postgres than we can use `MultiWriterIdGenerator` - # regardless of whether this process writes to the streams or not. - self._stream_id_gen = MultiWriterIdGenerator( - db_conn=db_conn, - db=database, - instance_name=hs.get_instance_name(), - table="events", - instance_column="instance_name", - id_column="stream_ordering", - sequence_name="events_stream_seq", + if hs.config.worker.writers.events == hs.get_instance_name(): + # We are the process in charge of generating stream ids for events, + # so instantiate ID generators based on the database + self._stream_id_gen = StreamIdGenerator( + db_conn, "events", "stream_ordering", ) - self._backfill_id_gen = MultiWriterIdGenerator( - db_conn=db_conn, - db=database, - instance_name=hs.get_instance_name(), - table="events", - instance_column="instance_name", - id_column="stream_ordering", - sequence_name="events_backfill_stream_seq", - positive=False, + self._backfill_id_gen = StreamIdGenerator( + db_conn, + "events", + "stream_ordering", + step=-1, + extra_tables=[("ex_outlier_stream", "event_stream_ordering")], ) else: - # We shouldn't be running in worker mode with SQLite, but its useful - # to support it for unit tests. - # - # If this process is the writer than we need to use - # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets - # updated over replication. (Multiple writers are not supported for - # SQLite). - if hs.get_instance_name() in hs.config.worker.writers.events: - self._stream_id_gen = StreamIdGenerator( - db_conn, "events", "stream_ordering", - ) - self._backfill_id_gen = StreamIdGenerator( - db_conn, - "events", - "stream_ordering", - step=-1, - extra_tables=[("ex_outlier_stream", "event_stream_ordering")], - ) - else: - self._stream_id_gen = SlavedIdTracker( - db_conn, "events", "stream_ordering" - ) - self._backfill_id_gen = SlavedIdTracker( - db_conn, "events", "stream_ordering", step=-1 - ) + # Another process is in charge of persisting events and generating + # stream IDs: rely on the replication streams to let us know which + # IDs we can process. + self._stream_id_gen = SlavedIdTracker(db_conn, "events", "stream_ordering") + self._backfill_id_gen = SlavedIdTracker( + db_conn, "events", "stream_ordering", step=-1 + ) self._get_event_cache = Cache( "*getEvent*", diff --git a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql deleted file mode 100644 index 98ff76d709..0000000000 --- a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql +++ /dev/null @@ -1,16 +0,0 @@ -/* Copyright 2020 The Matrix.org Foundation C.I.C. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -ALTER TABLE events ADD COLUMN instance_name TEXT; diff --git a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres deleted file mode 100644 index 97c1e6a0c5..0000000000 --- a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright 2020 The Matrix.org Foundation C.I.C. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE SEQUENCE IF NOT EXISTS events_stream_seq; - -SELECT setval('events_stream_seq', ( - SELECT COALESCE(MAX(stream_ordering), 1) FROM events -)); - -CREATE SEQUENCE IF NOT EXISTS events_backfill_stream_seq; - -SELECT setval('events_backfill_stream_seq', ( - SELECT COALESCE(-MIN(stream_ordering), 1) FROM events -)); diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 8fd21c2bf8..9f3d23f0a5 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -231,12 +231,8 @@ class MultiWriterIdGenerator: # gaps should be relatively rare it's still worth doing the book keeping # that allows us to skip forwards when there are gapless runs of # positions. - # - # We start at 1 here as a) the first generated stream ID will be 2, and - # b) other parts of the code assume that stream IDs are strictly greater - # than 0. self._persisted_upto_position = ( - min(self._current_positions.values()) if self._current_positions else 1 + min(self._current_positions.values()) if self._current_positions else 0 ) self._known_persisted_positions = [] # type: List[int] @@ -366,7 +362,9 @@ class MultiWriterIdGenerator: equal to it have been successfully persisted. """ - return self.get_persisted_upto_position() + # Currently we don't support this operation, as it's not obvious how to + # condense the stream positions of multiple writers into a single int. + raise NotImplementedError() def get_current_token_for_writer(self, instance_name: str) -> int: """Returns the position of the given writer. -- cgit 1.5.1 From c619253db80c8d1c606dc40756dd3c9e3a55a9fb Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 4 Sep 2020 06:54:56 -0400 Subject: Stop sub-classing object (#8249) --- changelog.d/8249.misc | 1 + contrib/cmdclient/http.py | 6 ++--- contrib/experiments/cursesio.py | 2 +- contrib/experiments/test_messaging.py | 4 ++-- scripts-dev/hash_history.py | 2 +- synapse/api/auth.py | 2 +- synapse/api/auth_blocking.py | 2 +- synapse/api/constants.py | 26 +++++++++++----------- synapse/api/errors.py | 2 +- synapse/api/filtering.py | 6 ++--- synapse/api/ratelimiting.py | 2 +- synapse/api/room_versions.py | 10 ++++----- synapse/api/urls.py | 2 +- synapse/app/_base.py | 4 ++-- synapse/app/generic_worker.py | 2 +- synapse/appservice/__init__.py | 6 ++--- synapse/appservice/scheduler.py | 8 +++---- synapse/config/_base.py | 4 ++-- synapse/config/cache.py | 2 +- synapse/config/key.py | 2 +- synapse/config/metrics.py | 2 +- synapse/config/ratelimiting.py | 4 ++-- synapse/config/room.py | 2 +- synapse/config/room_directory.py | 2 +- synapse/config/server.py | 2 +- synapse/crypto/context_factory.py | 8 +++---- synapse/crypto/keyring.py | 8 +++---- synapse/events/__init__.py | 2 +- synapse/events/builder.py | 4 ++-- synapse/events/spamcheck.py | 2 +- synapse/events/third_party_rules.py | 2 +- synapse/events/utils.py | 2 +- synapse/events/validator.py | 2 +- synapse/federation/federation_base.py | 2 +- synapse/federation/federation_server.py | 2 +- synapse/federation/persistence.py | 2 +- synapse/federation/send_queue.py | 4 ++-- synapse/federation/sender/__init__.py | 2 +- synapse/federation/sender/per_destination_queue.py | 2 +- synapse/federation/sender/transaction_manager.py | 2 +- synapse/federation/transport/client.py | 2 +- synapse/federation/transport/server.py | 4 ++-- synapse/groups/attestations.py | 4 ++-- synapse/groups/groups_server.py | 2 +- synapse/handlers/__init__.py | 2 +- synapse/handlers/_base.py | 2 +- synapse/handlers/account_data.py | 2 +- synapse/handlers/account_validity.py | 2 +- synapse/handlers/acme.py | 2 +- synapse/handlers/acme_issuing_service.py | 2 +- synapse/handlers/admin.py | 2 +- synapse/handlers/appservice.py | 2 +- synapse/handlers/auth.py | 2 +- synapse/handlers/device.py | 2 +- synapse/handlers/devicemessage.py | 2 +- synapse/handlers/e2e_keys.py | 4 ++-- synapse/handlers/e2e_room_keys.py | 2 +- synapse/handlers/groups_local.py | 2 +- synapse/handlers/message.py | 4 ++-- synapse/handlers/pagination.py | 4 ++-- synapse/handlers/password_policy.py | 2 +- synapse/handlers/presence.py | 2 +- synapse/handlers/receipts.py | 2 +- synapse/handlers/room.py | 6 ++--- synapse/handlers/room_member.py | 2 +- synapse/handlers/saml_handler.py | 4 ++-- synapse/handlers/state_deltas.py | 2 +- synapse/handlers/sync.py | 4 ++-- synapse/handlers/typing.py | 2 +- synapse/http/client.py | 8 +++---- synapse/http/connectproxyclient.py | 2 +- synapse/http/federation/matrix_federation_agent.py | 6 ++--- synapse/http/federation/srv_resolver.py | 4 ++-- synapse/http/federation/well_known_resolver.py | 4 ++-- synapse/http/matrixfederationclient.py | 6 ++--- synapse/http/request_metrics.py | 2 +- synapse/http/server.py | 2 +- synapse/http/servlet.py | 2 +- synapse/logging/_structured.py | 6 ++--- synapse/logging/_terse_json.py | 4 ++-- synapse/logging/context.py | 6 ++--- synapse/logging/opentracing.py | 2 +- synapse/metrics/__init__.py | 16 ++++++------- synapse/metrics/background_process_metrics.py | 4 ++-- synapse/module_api/__init__.py | 2 +- synapse/notifier.py | 6 ++--- synapse/push/action_generator.py | 2 +- synapse/push/bulk_push_rule_evaluator.py | 4 ++-- synapse/push/emailpusher.py | 2 +- synapse/push/httppusher.py | 2 +- synapse/push/mailer.py | 2 +- synapse/push/push_rule_evaluator.py | 2 +- synapse/push/pusher.py | 2 +- synapse/replication/http/_base.py | 2 +- .../slave/storage/_slaved_id_tracker.py | 2 +- synapse/replication/tcp/protocol.py | 2 +- synapse/replication/tcp/resource.py | 2 +- synapse/replication/tcp/streams/_base.py | 2 +- synapse/replication/tcp/streams/events.py | 4 ++-- synapse/rest/client/transactions.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/rest/media/v1/_base.py | 4 ++-- synapse/rest/media/v1/filepath.py | 2 +- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/media_storage.py | 2 +- synapse/rest/media/v1/thumbnailer.py | 2 +- synapse/rest/well_known.py | 2 +- synapse/secrets.py | 2 +- synapse/server_notices/consent_server_notices.py | 2 +- .../resource_limits_server_notices.py | 2 +- synapse/server_notices/server_notices_manager.py | 2 +- synapse/server_notices/server_notices_sender.py | 2 +- .../server_notices/worker_server_notices_sender.py | 2 +- synapse/spam_checker_api/__init__.py | 2 +- synapse/state/__init__.py | 8 +++---- synapse/storage/__init__.py | 2 +- synapse/storage/background_updates.py | 4 ++-- synapse/storage/database.py | 4 ++-- synapse/storage/databases/__init__.py | 2 +- synapse/storage/databases/main/roommember.py | 2 +- synapse/storage/keys.py | 2 +- synapse/storage/persist_events.py | 4 ++-- synapse/storage/prepare_database.py | 2 +- synapse/storage/purge_events.py | 2 +- synapse/storage/relations.py | 6 ++--- synapse/storage/state.py | 4 ++-- synapse/storage/util/id_generators.py | 4 ++-- synapse/streams/config.py | 4 ++-- synapse/streams/events.py | 2 +- synapse/types.py | 2 +- synapse/util/__init__.py | 2 +- synapse/util/async_helpers.py | 8 +++---- synapse/util/caches/__init__.py | 2 +- synapse/util/caches/descriptors.py | 8 +++---- synapse/util/caches/dictionary_cache.py | 4 ++-- synapse/util/caches/expiringcache.py | 4 ++-- synapse/util/caches/lrucache.py | 4 ++-- synapse/util/caches/response_cache.py | 2 +- synapse/util/caches/treecache.py | 4 ++-- synapse/util/caches/ttlcache.py | 4 ++-- synapse/util/distributor.py | 4 ++-- synapse/util/file_consumer.py | 2 +- synapse/util/jsonobject.py | 2 +- synapse/util/metrics.py | 2 +- synapse/util/ratelimitutils.py | 4 ++-- synapse/util/retryutils.py | 2 +- synapse/util/wheel_timer.py | 4 ++-- tests/api/test_auth.py | 2 +- tests/crypto/test_keyring.py | 2 +- tests/federation/transport/test_server.py | 2 +- tests/handlers/test_auth.py | 2 +- tests/handlers/test_profile.py | 2 +- tests/http/__init__.py | 2 +- .../federation/test_matrix_federation_agent.py | 2 +- tests/logging/test_structured.py | 4 ++-- tests/push/test_email.py | 2 +- tests/rest/client/third_party_rules.py | 2 +- tests/rest/client/v1/utils.py | 2 +- tests/rest/media/v1/test_url_preview.py | 6 ++--- tests/server.py | 6 ++--- tests/state/test_v2.py | 4 ++-- tests/storage/test__base.py | 18 +++++++-------- tests/test_state.py | 4 ++-- tests/test_visibility.py | 2 +- tests/unittest.py | 2 +- tests/util/caches/test_descriptors.py | 20 ++++++++--------- tests/util/test_file_consumer.py | 4 ++-- tests/utils.py | 6 ++--- 168 files changed, 293 insertions(+), 292 deletions(-) create mode 100644 changelog.d/8249.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8249.misc b/changelog.d/8249.misc new file mode 100644 index 0000000000..6a42e8a4e6 --- /dev/null +++ b/changelog.d/8249.misc @@ -0,0 +1 @@ +Stop sub-classing from object. diff --git a/contrib/cmdclient/http.py b/contrib/cmdclient/http.py index e2534ee584..cd3260b27d 100644 --- a/contrib/cmdclient/http.py +++ b/contrib/cmdclient/http.py @@ -24,7 +24,7 @@ from twisted.web.client import Agent, readBody from twisted.web.http_headers import Headers -class HttpClient(object): +class HttpClient: """ Interface for talking json over http """ @@ -169,7 +169,7 @@ class TwistedHttpClient(HttpClient): return d -class _RawProducer(object): +class _RawProducer: def __init__(self, data): self.data = data self.body = data @@ -186,7 +186,7 @@ class _RawProducer(object): pass -class _JsonProducer(object): +class _JsonProducer: """ Used by the twisted http client to create the HTTP body from json """ diff --git a/contrib/experiments/cursesio.py b/contrib/experiments/cursesio.py index ffefe3bb39..15a22c3a0e 100644 --- a/contrib/experiments/cursesio.py +++ b/contrib/experiments/cursesio.py @@ -141,7 +141,7 @@ class CursesStdIO: curses.endwin() -class Callback(object): +class Callback: def __init__(self, stdio): self.stdio = stdio diff --git a/contrib/experiments/test_messaging.py b/contrib/experiments/test_messaging.py index a84ec4ecae..d4c35ff2fc 100644 --- a/contrib/experiments/test_messaging.py +++ b/contrib/experiments/test_messaging.py @@ -55,7 +55,7 @@ def excpetion_errback(failure): logging.exception(failure) -class InputOutput(object): +class InputOutput: """ This is responsible for basic I/O so that a user can interact with the example app. """ @@ -132,7 +132,7 @@ class IOLoggerHandler(logging.Handler): self.io.print_log(msg) -class Room(object): +class Room: """ Used to store (in memory) the current membership state of a room, and which home servers we should send PDUs associated with the room to. """ diff --git a/scripts-dev/hash_history.py b/scripts-dev/hash_history.py index bf3862a386..89acb52e6a 100644 --- a/scripts-dev/hash_history.py +++ b/scripts-dev/hash_history.py @@ -15,7 +15,7 @@ from synapse.storage.pdu import PduStore from synapse.storage.signatures import SignatureStore -class Store(object): +class Store: _get_pdu_tuples = PduStore.__dict__["_get_pdu_tuples"] _get_pdu_content_hashes_txn = SignatureStore.__dict__["_get_pdu_content_hashes_txn"] _get_prev_pdu_hashes_txn = SignatureStore.__dict__["_get_prev_pdu_hashes_txn"] diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 7aab764360..75388643ee 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -58,7 +58,7 @@ class _InvalidMacaroonException(Exception): pass -class Auth(object): +class Auth: """ FIXME: This class contains a mix of functions for authenticating users of our client-server API and authenticating events added to room graphs. diff --git a/synapse/api/auth_blocking.py b/synapse/api/auth_blocking.py index 49093bf181..d8fafd7cb8 100644 --- a/synapse/api/auth_blocking.py +++ b/synapse/api/auth_blocking.py @@ -22,7 +22,7 @@ from synapse.config.server import is_threepid_reserved logger = logging.getLogger(__name__) -class AuthBlocking(object): +class AuthBlocking: def __init__(self, hs): self.store = hs.get_datastore() diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 6a6d32c302..46013cde15 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -28,7 +28,7 @@ MAX_ALIAS_LENGTH = 255 MAX_USERID_LENGTH = 255 -class Membership(object): +class Membership: """Represents the membership states of a user in a room.""" @@ -40,7 +40,7 @@ class Membership(object): LIST = (INVITE, JOIN, KNOCK, LEAVE, BAN) -class PresenceState(object): +class PresenceState: """Represents the presence state of a user.""" OFFLINE = "offline" @@ -48,14 +48,14 @@ class PresenceState(object): ONLINE = "online" -class JoinRules(object): +class JoinRules: PUBLIC = "public" KNOCK = "knock" INVITE = "invite" PRIVATE = "private" -class LoginType(object): +class LoginType: PASSWORD = "m.login.password" EMAIL_IDENTITY = "m.login.email.identity" MSISDN = "m.login.msisdn" @@ -65,7 +65,7 @@ class LoginType(object): DUMMY = "m.login.dummy" -class EventTypes(object): +class EventTypes: Member = "m.room.member" Create = "m.room.create" Tombstone = "m.room.tombstone" @@ -96,17 +96,17 @@ class EventTypes(object): Presence = "m.presence" -class RejectedReason(object): +class RejectedReason: AUTH_ERROR = "auth_error" -class RoomCreationPreset(object): +class RoomCreationPreset: PRIVATE_CHAT = "private_chat" PUBLIC_CHAT = "public_chat" TRUSTED_PRIVATE_CHAT = "trusted_private_chat" -class ThirdPartyEntityKind(object): +class ThirdPartyEntityKind: USER = "user" LOCATION = "location" @@ -115,7 +115,7 @@ ServerNoticeMsgType = "m.server_notice" ServerNoticeLimitReached = "m.server_notice.usage_limit_reached" -class UserTypes(object): +class UserTypes: """Allows for user type specific behaviour. With the benefit of hindsight 'admin' and 'guest' users should also be UserTypes. Normal users are type None """ @@ -125,7 +125,7 @@ class UserTypes(object): ALL_USER_TYPES = (SUPPORT, BOT) -class RelationTypes(object): +class RelationTypes: """The types of relations known to this server. """ @@ -134,14 +134,14 @@ class RelationTypes(object): REFERENCE = "m.reference" -class LimitBlockingTypes(object): +class LimitBlockingTypes: """Reasons that a server may be blocked""" MONTHLY_ACTIVE_USER = "monthly_active_user" HS_DISABLED = "hs_disabled" -class EventContentFields(object): +class EventContentFields: """Fields found in events' content, regardless of type.""" # Labels for the event, cf https://github.com/matrix-org/matrix-doc/pull/2326 @@ -152,6 +152,6 @@ class EventContentFields(object): SELF_DESTRUCT_AFTER = "org.matrix.self_destruct_after" -class RoomEncryptionAlgorithms(object): +class RoomEncryptionAlgorithms: MEGOLM_V1_AES_SHA2 = "m.megolm.v1.aes-sha2" DEFAULT = MEGOLM_V1_AES_SHA2 diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 4888c0ec4d..94a9e58eae 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -31,7 +31,7 @@ if typing.TYPE_CHECKING: logger = logging.getLogger(__name__) -class Codes(object): +class Codes: UNRECOGNIZED = "M_UNRECOGNIZED" UNAUTHORIZED = "M_UNAUTHORIZED" FORBIDDEN = "M_FORBIDDEN" diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index a8937d2595..2a2c9e6f13 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -130,7 +130,7 @@ def matrix_user_id_validator(user_id_str): return UserID.from_string(user_id_str) -class Filtering(object): +class Filtering: def __init__(self, hs): super(Filtering, self).__init__() self.store = hs.get_datastore() @@ -168,7 +168,7 @@ class Filtering(object): raise SynapseError(400, str(e)) -class FilterCollection(object): +class FilterCollection: def __init__(self, filter_json): self._filter_json = filter_json @@ -249,7 +249,7 @@ class FilterCollection(object): ) -class Filter(object): +class Filter: def __init__(self, filter_json): self.filter_json = filter_json diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index e62ae50ac2..5d9d5a228f 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -21,7 +21,7 @@ from synapse.types import Requester from synapse.util import Clock -class Ratelimiter(object): +class Ratelimiter: """ Ratelimit actions marked by arbitrary keys. diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index d7baf2bc39..f3ecbf36b6 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -18,7 +18,7 @@ from typing import Dict import attr -class EventFormatVersions(object): +class EventFormatVersions: """This is an internal enum for tracking the version of the event format, independently from the room version. """ @@ -35,20 +35,20 @@ KNOWN_EVENT_FORMAT_VERSIONS = { } -class StateResolutionVersions(object): +class StateResolutionVersions: """Enum to identify the state resolution algorithms""" V1 = 1 # room v1 state res V2 = 2 # MSC1442 state res: room v2 and later -class RoomDisposition(object): +class RoomDisposition: STABLE = "stable" UNSTABLE = "unstable" @attr.s(slots=True, frozen=True) -class RoomVersion(object): +class RoomVersion: """An object which describes the unique attributes of a room version.""" identifier = attr.ib() # str; the identifier for this version @@ -69,7 +69,7 @@ class RoomVersion(object): limit_notifications_power_levels = attr.ib(type=bool) -class RoomVersions(object): +class RoomVersions: V1 = RoomVersion( "1", RoomDisposition.STABLE, diff --git a/synapse/api/urls.py b/synapse/api/urls.py index bd03ebca5a..bbfccf955e 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -33,7 +33,7 @@ MEDIA_PREFIX = "/_matrix/media/r0" LEGACY_MEDIA_PREFIX = "/_matrix/media/v1" -class ConsentURIBuilder(object): +class ConsentURIBuilder: def __init__(self, hs_config): """ Args: diff --git a/synapse/app/_base.py b/synapse/app/_base.py index a43dc5b2c9..fb476ddaf5 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -349,7 +349,7 @@ def install_dns_limiter(reactor, max_dns_requests_in_flight=100): reactor.installNameResolver(new_resolver) -class _LimitedHostnameResolver(object): +class _LimitedHostnameResolver: """Wraps a IHostnameResolver, limiting the number of in-flight DNS lookups. """ @@ -409,7 +409,7 @@ class _LimitedHostnameResolver(object): yield deferred -class _DeferredResolutionReceiver(object): +class _DeferredResolutionReceiver: """Wraps a IResolutionReceiver and simply resolves the given deferred when resolution is complete """ diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 739b013d4c..f985810e88 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -745,7 +745,7 @@ class GenericWorkerReplicationHandler(ReplicationDataHandler): self.send_handler.wake_destination(server) -class FederationSenderHandler(object): +class FederationSenderHandler: """Processes the fedration replication stream This class is only instantiate on the worker responsible for sending outbound diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 69a7182ef4..13ec1f71a6 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -27,12 +27,12 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class ApplicationServiceState(object): +class ApplicationServiceState: DOWN = "down" UP = "up" -class AppServiceTransaction(object): +class AppServiceTransaction: """Represents an application service transaction.""" def __init__(self, service, id, events): @@ -64,7 +64,7 @@ class AppServiceTransaction(object): await store.complete_appservice_txn(service=self.service, txn_id=self.id) -class ApplicationService(object): +class ApplicationService: """Defines an application service. This definition is mostly what is provided to the /register AS API. diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index d5204b1314..8eb8c6f51c 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -57,7 +57,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process logger = logging.getLogger(__name__) -class ApplicationServiceScheduler(object): +class ApplicationServiceScheduler: """ Public facing API for this module. Does the required DI to tie the components together. This also serves as the "event_pool", which in this case is a simple array. @@ -86,7 +86,7 @@ class ApplicationServiceScheduler(object): self.queuer.enqueue(service, event) -class _ServiceQueuer(object): +class _ServiceQueuer: """Queue of events waiting to be sent to appservices. Groups events into transactions per-appservice, and sends them on to the @@ -133,7 +133,7 @@ class _ServiceQueuer(object): self.requests_in_flight.discard(service.id) -class _TransactionController(object): +class _TransactionController: """Transaction manager. Builds AppServiceTransactions and runs their lifecycle. Also starts a Recoverer @@ -209,7 +209,7 @@ class _TransactionController(object): return state == ApplicationServiceState.UP or state is None -class _Recoverer(object): +class _Recoverer: """Manages retries and backoff for a DOWN appservice. We have one of these for each appservice which is currently considered DOWN. diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 1417487427..ad5ab6ad62 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -88,7 +88,7 @@ def path_exists(file_path): return False -class Config(object): +class Config: """ A configuration section, containing configuration keys and values. @@ -283,7 +283,7 @@ def _create_mxc_to_http_filter(public_baseurl: str) -> Callable: return mxc_to_http_filter -class RootConfig(object): +class RootConfig: """ Holder of an application's configuration. diff --git a/synapse/config/cache.py b/synapse/config/cache.py index aff5b21ab2..8e03f14005 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -33,7 +33,7 @@ _DEFAULT_FACTOR_SIZE = 0.5 _DEFAULT_EVENT_CACHE_SIZE = "10K" -class CacheProperties(object): +class CacheProperties: def __init__(self): # The default factor size for all caches self.default_factor_size = float( diff --git a/synapse/config/key.py b/synapse/config/key.py index b529ea5da0..de964dff13 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -82,7 +82,7 @@ logger = logging.getLogger(__name__) @attr.s -class TrustedKeyServer(object): +class TrustedKeyServer: # string: name of the server. server_name = attr.ib() diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 6aad0d37c0..dfd27e1523 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -22,7 +22,7 @@ from ._base import Config, ConfigError @attr.s -class MetricsFlags(object): +class MetricsFlags: known_servers = attr.ib(default=False, validator=attr.validators.instance_of(bool)) @classmethod diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index b2c78ac40c..14b8836197 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -17,7 +17,7 @@ from typing import Dict from ._base import Config -class RateLimitConfig(object): +class RateLimitConfig: def __init__( self, config: Dict[str, float], @@ -27,7 +27,7 @@ class RateLimitConfig(object): self.burst_count = config.get("burst_count", defaults["burst_count"]) -class FederationRateLimitConfig(object): +class FederationRateLimitConfig: _items_and_default = { "window_size": 1000, "sleep_limit": 10, diff --git a/synapse/config/room.py b/synapse/config/room.py index 52cf0b62fc..692d7a1936 100644 --- a/synapse/config/room.py +++ b/synapse/config/room.py @@ -22,7 +22,7 @@ from ._base import Config, ConfigError logger = logging.Logger(__name__) -class RoomDefaultEncryptionTypes(object): +class RoomDefaultEncryptionTypes: """Possible values for the encryption_enabled_by_default_for_room_type config option""" ALL = "all" diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 7ac7699676..6de1f9d103 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -149,7 +149,7 @@ class RoomDirectoryConfig(Config): return False -class _RoomDirectoryRule(object): +class _RoomDirectoryRule: """Helper class to test whether a room directory action is allowed, like creating an alias or publishing a room. """ diff --git a/synapse/config/server.py b/synapse/config/server.py index 526a90b26a..e85c6a0840 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -424,7 +424,7 @@ class ServerConfig(Config): self.gc_thresholds = read_gc_thresholds(config.get("gc_thresholds", None)) @attr.s - class LimitRemoteRoomsConfig(object): + class LimitRemoteRoomsConfig: enabled = attr.ib( validator=attr.validators.instance_of(bool), default=False ) diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 777c0f00b1..2b03f5ac76 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -83,7 +83,7 @@ class ServerContextFactory(ContextFactory): @implementer(IPolicyForHTTPS) -class FederationPolicyForHTTPS(object): +class FederationPolicyForHTTPS: """Factory for Twisted SSLClientConnectionCreators that are used to make connections to remote servers for federation. @@ -152,7 +152,7 @@ class FederationPolicyForHTTPS(object): @implementer(IPolicyForHTTPS) -class RegularPolicyForHTTPS(object): +class RegularPolicyForHTTPS: """Factory for Twisted SSLClientConnectionCreators that are used to make connections to remote servers, for other than federation. @@ -189,7 +189,7 @@ def _context_info_cb(ssl_connection, where, ret): @implementer(IOpenSSLClientConnectionCreator) -class SSLClientConnectionCreator(object): +class SSLClientConnectionCreator: """Creates openssl connection objects for client connections. Replaces twisted.internet.ssl.ClientTLSOptions @@ -214,7 +214,7 @@ class SSLClientConnectionCreator(object): return connection -class ConnectionVerifier(object): +class ConnectionVerifier: """Set the SNI, and do cert verification This is a thing which is attached to the TLSMemoryBIOProtocol, and is called by diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 81c4b430b2..32c31b1cd1 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -57,7 +57,7 @@ logger = logging.getLogger(__name__) @attr.s(slots=True, cmp=False) -class VerifyJsonRequest(object): +class VerifyJsonRequest: """ A request to verify a JSON object. @@ -96,7 +96,7 @@ class KeyLookupError(ValueError): pass -class Keyring(object): +class Keyring: def __init__(self, hs, key_fetchers=None): self.clock = hs.get_clock() @@ -420,7 +420,7 @@ class Keyring(object): remaining_requests.difference_update(completed) -class KeyFetcher(object): +class KeyFetcher: async def get_keys(self, keys_to_fetch): """ Args: @@ -456,7 +456,7 @@ class StoreKeyFetcher(KeyFetcher): return keys -class BaseV2KeyFetcher(object): +class BaseV2KeyFetcher: def __init__(self, hs): self.store = hs.get_datastore() self.config = hs.get_config() diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 62ea44fa49..bf800a3852 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -96,7 +96,7 @@ class DefaultDictProperty(DictProperty): return instance._dict.get(self.key, self.default) -class _EventInternalMetadata(object): +class _EventInternalMetadata: __slots__ = ["_dict"] def __init__(self, internal_metadata_dict: JsonDict): diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 7878cd7044..b6c47be646 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -36,7 +36,7 @@ from synapse.util.stringutils import random_string @attr.s(slots=True, cmp=False, frozen=True) -class EventBuilder(object): +class EventBuilder: """A format independent event builder used to build up the event content before signing the event. @@ -164,7 +164,7 @@ class EventBuilder(object): ) -class EventBuilderFactory(object): +class EventBuilderFactory: def __init__(self, hs): self.clock = hs.get_clock() self.hostname = hs.hostname diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index a7cddac974..b0fc859a47 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -25,7 +25,7 @@ if MYPY: import synapse.server -class SpamChecker(object): +class SpamChecker: def __init__(self, hs: "synapse.server.HomeServer"): self.spam_checkers = [] # type: List[Any] diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 2956a64234..9d5310851c 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -18,7 +18,7 @@ from synapse.events.snapshot import EventContext from synapse.types import Requester -class ThirdPartyEventRules(object): +class ThirdPartyEventRules: """Allows server admins to provide a Python module implementing an extra set of rules to apply when processing events. diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 2d42e268c6..32c73d3413 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -322,7 +322,7 @@ def serialize_event( return d -class EventClientSerializer(object): +class EventClientSerializer: """Serializes events that are to be sent to clients. This is used for bundling extra information with any events to be sent to diff --git a/synapse/events/validator.py b/synapse/events/validator.py index 5ce3874fba..9df35b54ba 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -20,7 +20,7 @@ from synapse.events.utils import validate_canonicaljson from synapse.types import EventID, RoomID, UserID -class EventValidator(object): +class EventValidator: def validate_new(self, event, config): """Validates the event has roughly the right format diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 420df2385f..38aa47963f 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -39,7 +39,7 @@ from synapse.types import JsonDict, get_domain_from_id logger = logging.getLogger(__name__) -class FederationBase(object): +class FederationBase: def __init__(self, hs): self.hs = hs diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 630f571cd4..218df884b0 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -785,7 +785,7 @@ def _acl_entry_matches(server_name: str, acl_entry: str) -> Match: return regex.match(server_name) -class FederationHandlerRegistry(object): +class FederationHandlerRegistry: """Allows classes to register themselves as handlers for a given EDU or query type for incoming federation traffic. """ diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py index de1fe7da38..079e2b2fe0 100644 --- a/synapse/federation/persistence.py +++ b/synapse/federation/persistence.py @@ -29,7 +29,7 @@ from synapse.types import JsonDict logger = logging.getLogger(__name__) -class TransactionActions(object): +class TransactionActions: """ Defines persistence actions that relate to handling Transactions. """ diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 4d65d4aeea..8e46957d15 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -46,7 +46,7 @@ from .units import Edu logger = logging.getLogger(__name__) -class FederationRemoteSendQueue(object): +class FederationRemoteSendQueue: """A drop in replacement for FederationSender""" def __init__(self, hs): @@ -365,7 +365,7 @@ class FederationRemoteSendQueue(object): ) -class BaseFederationRow(object): +class BaseFederationRow: """Base class for rows to be sent in the federation stream. Specifies how to identify, serialize and deserialize the different types. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 5276c1734f..552519e82c 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -56,7 +56,7 @@ sent_pdus_destination_dist_total = Counter( ) -class FederationSender(object): +class FederationSender: def __init__(self, hs: "synapse.server.HomeServer"): self.hs = hs self.server_name = hs.hostname diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index f1534d431d..defc228c23 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -53,7 +53,7 @@ sent_edus_by_type = Counter( ) -class PerDestinationQueue(object): +class PerDestinationQueue: """ Manages the per-destination transmission queues. diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 0ebc70d57d..c84072ab73 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -35,7 +35,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class TransactionManager(object): +class TransactionManager: """Helper class which handles building and sending transactions shared between PerDestinationQueue objects diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 9ea821dbb2..17a10f622e 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -30,7 +30,7 @@ from synapse.logging.utils import log_function logger = logging.getLogger(__name__) -class TransportLayerClient(object): +class TransportLayerClient: """Sends federation HTTP requests to other servers""" def __init__(self, hs): diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 5e111aa902..9325e0f857 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -100,7 +100,7 @@ class NoAuthenticationError(AuthenticationError): pass -class Authenticator(object): +class Authenticator: def __init__(self, hs: HomeServer): self._clock = hs.get_clock() self.keyring = hs.get_keyring() @@ -228,7 +228,7 @@ def _parse_auth_header(header_bytes): ) -class BaseFederationServlet(object): +class BaseFederationServlet: """Abstract base class for federation servlet classes. The servlet object should have a PATH attribute which takes the form of a regexp to diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index e674bf44a2..a86b3debc5 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -60,7 +60,7 @@ DEFAULT_ATTESTATION_JITTER = (0.9, 1.3) UPDATE_ATTESTATION_TIME_MS = 1 * 24 * 60 * 60 * 1000 -class GroupAttestationSigning(object): +class GroupAttestationSigning: """Creates and verifies group attestations. """ @@ -124,7 +124,7 @@ class GroupAttestationSigning(object): ) -class GroupAttestionRenewer(object): +class GroupAttestionRenewer: """Responsible for sending and receiving attestation updates. """ diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index 8cb922ddc7..1dd20ee4e1 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) # TODO: Flairs -class GroupsServerWorkerHandler(object): +class GroupsServerWorkerHandler: def __init__(self, hs): self.hs = hs self.store = hs.get_datastore() diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 2dd183018a..286f0054be 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -20,7 +20,7 @@ from .identity import IdentityHandler from .search import SearchHandler -class Handlers(object): +class Handlers: """ Deprecated. A collection of handlers. diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index ba2bf99800..0206320e96 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -25,7 +25,7 @@ from synapse.types import UserID logger = logging.getLogger(__name__) -class BaseHandler(object): +class BaseHandler: """ Common base class for the event handlers. """ diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index a8d3fbc6de..9112a0ab86 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -14,7 +14,7 @@ # limitations under the License. -class AccountDataEventSource(object): +class AccountDataEventSource: def __init__(self, hs): self.store = hs.get_datastore() diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index b865bf5b48..4caf6d591a 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -29,7 +29,7 @@ from synapse.util import stringutils logger = logging.getLogger(__name__) -class AccountValidityHandler(object): +class AccountValidityHandler: def __init__(self, hs): self.hs = hs self.config = hs.config diff --git a/synapse/handlers/acme.py b/synapse/handlers/acme.py index 7666d3abcd..8476256a59 100644 --- a/synapse/handlers/acme.py +++ b/synapse/handlers/acme.py @@ -34,7 +34,7 @@ solutions, please read https://github.com/matrix-org/synapse/blob/master/docs/AC --------------------------------------------------------------------------------""" -class AcmeHandler(object): +class AcmeHandler: def __init__(self, hs): self.hs = hs self.reactor = hs.get_reactor() diff --git a/synapse/handlers/acme_issuing_service.py b/synapse/handlers/acme_issuing_service.py index e1d4224e74..69650ff221 100644 --- a/synapse/handlers/acme_issuing_service.py +++ b/synapse/handlers/acme_issuing_service.py @@ -78,7 +78,7 @@ def create_issuing_service(reactor, acme_url, account_key_file, well_known_resou @attr.s @implementer(ICertificateStore) -class ErsatzStore(object): +class ErsatzStore: """ A store that only stores in memory. """ diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 506bb2b275..918d0e037c 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -197,7 +197,7 @@ class AdminHandler(BaseHandler): return writer.finished() -class ExfiltrationWriter(object): +class ExfiltrationWriter: """Interface used to specify how to write exported data. """ diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index c9044a5019..9d4e87dad6 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -34,7 +34,7 @@ logger = logging.getLogger(__name__) events_processed_counter = Counter("synapse_handlers_appservice_events_processed", "") -class ApplicationServicesHandler(object): +class ApplicationServicesHandler: def __init__(self, hs): self.store = hs.get_datastore() self.is_mine_id = hs.is_mine_id diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index f0b0a4d76a..90189869cc 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1236,7 +1236,7 @@ class AuthHandler(BaseHandler): @attr.s -class MacaroonGenerator(object): +class MacaroonGenerator: hs = attr.ib() diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index ee4666337a..643d71a710 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -497,7 +497,7 @@ def _update_device_from_client_ips(device, client_ips): device.update({"last_seen_ts": ip.get("last_seen"), "last_seen_ip": ip.get("ip")}) -class DeviceListUpdater(object): +class DeviceListUpdater: "Handles incoming device list updates from federation and updates the DB" def __init__(self, hs, device_handler): diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index dcb4c82244..64ef7f63ab 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -31,7 +31,7 @@ from synapse.util.stringutils import random_string logger = logging.getLogger(__name__) -class DeviceMessageHandler(object): +class DeviceMessageHandler: def __init__(self, hs): """ Args: diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index dfd1c78549..d629c7c16c 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -43,7 +43,7 @@ from synapse.util.retryutils import NotRetryingDestination logger = logging.getLogger(__name__) -class E2eKeysHandler(object): +class E2eKeysHandler: def __init__(self, hs): self.store = hs.get_datastore() self.federation = hs.get_federation_client() @@ -1212,7 +1212,7 @@ class SignatureListItem: signature = attr.ib() -class SigningKeyEduUpdater(object): +class SigningKeyEduUpdater: """Handles incoming signing key updates from federation and updates the DB""" def __init__(self, hs, e2e_keys_handler): diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index 0bb983dc28..f01b090772 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -29,7 +29,7 @@ from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) -class E2eRoomKeysHandler(object): +class E2eRoomKeysHandler: """ Implements an optional realtime backup mechanism for encrypted E2E megolm room keys. This gives a way for users to store and recover their megolm keys if they lose all diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 0e2656ccb3..44df567983 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -52,7 +52,7 @@ def _create_rerouter(func_name): return f -class GroupsLocalWorkerHandler(object): +class GroupsLocalWorkerHandler: def __init__(self, hs): self.hs = hs self.store = hs.get_datastore() diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 72bb638167..8a7b4916cd 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -64,7 +64,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class MessageHandler(object): +class MessageHandler: """Contains some read only APIs to get state about a room """ @@ -361,7 +361,7 @@ class MessageHandler(object): _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY = 7 * 24 * 60 * 60 * 1000 -class EventCreationHandler(object): +class EventCreationHandler: def __init__(self, hs: "HomeServer"): self.hs = hs self.auth = hs.get_auth() diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 63d7edff87..34ed0e2921 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -37,7 +37,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class PurgeStatus(object): +class PurgeStatus: """Object tracking the status of a purge request This class contains information on the progress of a purge request, for @@ -65,7 +65,7 @@ class PurgeStatus(object): return {"status": PurgeStatus.STATUS_TEXT[self.status]} -class PaginationHandler(object): +class PaginationHandler: """Handles pagination and purge history requests. These are in the same handler due to the fact we need to block clients diff --git a/synapse/handlers/password_policy.py b/synapse/handlers/password_policy.py index d06b110269..88e2f87200 100644 --- a/synapse/handlers/password_policy.py +++ b/synapse/handlers/password_policy.py @@ -22,7 +22,7 @@ from synapse.api.errors import Codes, PasswordRefusedError logger = logging.getLogger(__name__) -class PasswordPolicyHandler(object): +class PasswordPolicyHandler: def __init__(self, hs): self.policy = hs.config.password_policy self.enabled = hs.config.password_policy_enabled diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 1846068150..91a3aec1cc 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -1010,7 +1010,7 @@ def format_user_presence_state(state, now, include_user_id=True): return content -class PresenceEventSource(object): +class PresenceEventSource: def __init__(self, hs): # We can't call get_presence_handler here because there's a cycle: # diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index f922d8a545..2cc6c2eb68 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -123,7 +123,7 @@ class ReceiptsHandler(BaseHandler): await self.federation.send_read_receipt(receipt) -class ReceiptEventSource(object): +class ReceiptEventSource: def __init__(self, hs): self.store = hs.get_datastore() diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 9d5b1828df..a29305f655 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -974,7 +974,7 @@ class RoomCreationHandler(BaseHandler): raise StoreError(500, "Couldn't generate a room ID.") -class RoomContextHandler(object): +class RoomContextHandler: def __init__(self, hs: "HomeServer"): self.hs = hs self.store = hs.get_datastore() @@ -1084,7 +1084,7 @@ class RoomContextHandler(object): return results -class RoomEventSource(object): +class RoomEventSource: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -1146,7 +1146,7 @@ class RoomEventSource(object): return self.store.get_room_events_max_id(room_id) -class RoomShutdownHandler(object): +class RoomShutdownHandler: DEFAULT_MESSAGE = ( "Sharing illegal content on this server is not permitted and rooms in" diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a7962b0ada..32b7e323fa 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -51,7 +51,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class RoomMemberHandler(object): +class RoomMemberHandler: # TODO(paul): This handler currently contains a messy conflation of # low-level API that works on UserID objects and so on, and REST-level # API that takes ID strings and returns pagination chunks. These concerns diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index b426199aa6..66b063f991 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -360,12 +360,12 @@ MXID_MAPPER_MAP = { @attr.s -class SamlConfig(object): +class SamlConfig: mxid_source_attribute = attr.ib() mxid_mapper = attr.ib() -class DefaultSamlMappingProvider(object): +class DefaultSamlMappingProvider: __version__ = "0.0.1" def __init__(self, parsed_config: SamlConfig, module_api: ModuleApi): diff --git a/synapse/handlers/state_deltas.py b/synapse/handlers/state_deltas.py index 8590c1eff4..7a4ae0727a 100644 --- a/synapse/handlers/state_deltas.py +++ b/synapse/handlers/state_deltas.py @@ -18,7 +18,7 @@ import logging logger = logging.getLogger(__name__) -class StateDeltasHandler(object): +class StateDeltasHandler: def __init__(self, hs): self.store = hs.get_datastore() diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 8728403e62..e2ddb628ff 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -246,7 +246,7 @@ class SyncResult: __bool__ = __nonzero__ # python3 -class SyncHandler(object): +class SyncHandler: def __init__(self, hs: "HomeServer"): self.hs_config = hs.config self.store = hs.get_datastore() @@ -2075,7 +2075,7 @@ class SyncResultBuilder: @attr.s -class RoomSyncResultBuilder(object): +class RoomSyncResultBuilder: """Stores information needed to create either a `JoinedSyncResult` or `ArchivedSyncResult`. diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 1d828bd7be..3cbfc2d780 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -412,7 +412,7 @@ class TypingWriterHandler(FollowerTypingHandler): raise Exception("Typing writer instance got typing info over replication") -class TypingNotificationEventSource(object): +class TypingNotificationEventSource: def __init__(self, hs): self.hs = hs self.clock = hs.get_clock() diff --git a/synapse/http/client.py b/synapse/http/client.py index dad01a8e56..13fcab3378 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -86,7 +86,7 @@ def _make_scheduler(reactor): return _scheduler -class IPBlacklistingResolver(object): +class IPBlacklistingResolver: """ A proxy for reactor.nameResolver which only produces non-blacklisted IP addresses, preventing DNS rebinding attacks on URL preview. @@ -133,7 +133,7 @@ class IPBlacklistingResolver(object): r.resolutionComplete() @provider(IResolutionReceiver) - class EndpointReceiver(object): + class EndpointReceiver: @staticmethod def resolutionBegan(resolutionInProgress): pass @@ -192,7 +192,7 @@ class BlacklistingAgentWrapper(Agent): ) -class SimpleHttpClient(object): +class SimpleHttpClient: """ A simple, no-frills HTTP client with methods that wrap up common ways of using HTTP in Matrix @@ -244,7 +244,7 @@ class SimpleHttpClient(object): ) @implementer(IReactorPluggableNameResolver) - class Reactor(object): + class Reactor: def __getattr__(_self, attr): if attr == "nameResolver": return nameResolver diff --git a/synapse/http/connectproxyclient.py b/synapse/http/connectproxyclient.py index be7b2ceb8e..856e28454f 100644 --- a/synapse/http/connectproxyclient.py +++ b/synapse/http/connectproxyclient.py @@ -31,7 +31,7 @@ class ProxyConnectError(ConnectError): @implementer(IStreamClientEndpoint) -class HTTPConnectProxyEndpoint(object): +class HTTPConnectProxyEndpoint: """An Endpoint implementation which will send a CONNECT request to an http proxy Wraps an existing HostnameEndpoint for the proxy. diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 782d39d4ca..83d6196d4a 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) @implementer(IAgent) -class MatrixFederationAgent(object): +class MatrixFederationAgent: """An Agent-like thing which provides a `request` method which correctly handles resolving matrix server names when using matrix://. Handles standard https URIs as normal. @@ -175,7 +175,7 @@ class MatrixFederationAgent(object): @implementer(IAgentEndpointFactory) -class MatrixHostnameEndpointFactory(object): +class MatrixHostnameEndpointFactory: """Factory for MatrixHostnameEndpoint for parsing to an Agent. """ @@ -198,7 +198,7 @@ class MatrixHostnameEndpointFactory(object): @implementer(IStreamClientEndpoint) -class MatrixHostnameEndpoint(object): +class MatrixHostnameEndpoint: """An endpoint that resolves matrix:// URLs using Matrix server name resolution (i.e. via SRV). Does not check for well-known delegation. diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index 2ede90a9b1..d9620032d2 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -33,7 +33,7 @@ SERVER_CACHE = {} @attr.s(slots=True, frozen=True) -class Server(object): +class Server: """ Our record of an individual server which can be tried to reach a destination. @@ -96,7 +96,7 @@ def _sort_server_list(server_list): return results -class SrvResolver(object): +class SrvResolver: """Interface to the dns client to do SRV lookups, with result caching. The default resolver in twisted.names doesn't do any caching (it has a CacheResolver, diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index cdb6bec56e..e6f067ca29 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -71,11 +71,11 @@ _had_valid_well_known_cache = TTLCache("had-valid-well-known") @attr.s(slots=True, frozen=True) -class WellKnownLookupResult(object): +class WellKnownLookupResult: delegated_server = attr.ib() -class WellKnownResolver(object): +class WellKnownResolver: """Handles well-known lookups for matrix servers. """ diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 738be43f46..775fad3be4 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -76,7 +76,7 @@ _next_id = 1 @attr.s(frozen=True) -class MatrixFederationRequest(object): +class MatrixFederationRequest: method = attr.ib() """HTTP method :type: str @@ -203,7 +203,7 @@ async def _handle_json_response( return body -class MatrixFederationHttpClient(object): +class MatrixFederationHttpClient: """HTTP client used to talk to other homeservers over the federation protocol. Send client certificates and signs requests. @@ -226,7 +226,7 @@ class MatrixFederationHttpClient(object): ) @implementer(IReactorPluggableNameResolver) - class Reactor(object): + class Reactor: def __getattr__(_self, attr): if attr == "nameResolver": return nameResolver diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index b58ae3d9db..cd94e789e8 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -145,7 +145,7 @@ LaterGauge( ) -class RequestMetrics(object): +class RequestMetrics: def start(self, time_sec, name, method): self.start = time_sec self.start_context = current_context() diff --git a/synapse/http/server.py b/synapse/http/server.py index 8d791bd2ca..996a31a9ec 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -174,7 +174,7 @@ def wrap_async_request_handler(h): return preserve_fn(wrapped_async_request_handler) -class HttpServer(object): +class HttpServer: """ Interface for registering callbacks on a HTTP server """ diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 53acba56cb..fd90ba7828 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -256,7 +256,7 @@ def assert_params_in_dict(body, required): raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) -class RestServlet(object): +class RestServlet: """ A Synapse REST Servlet. diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 7372450b45..144506c8f2 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -55,7 +55,7 @@ def stdlib_log_level_to_twisted(level: str) -> LogLevel: @attr.s @implementer(ILogObserver) -class LogContextObserver(object): +class LogContextObserver: """ An ILogObserver which adds Synapse-specific log context information. @@ -169,7 +169,7 @@ class OutputPipeType(Values): @attr.s -class DrainConfiguration(object): +class DrainConfiguration: name = attr.ib() type = attr.ib() location = attr.ib() @@ -177,7 +177,7 @@ class DrainConfiguration(object): @attr.s -class NetworkJSONTerseOptions(object): +class NetworkJSONTerseOptions: maximum_buffer = attr.ib(type=int) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index c0b9384189..1b8916cfa2 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -152,7 +152,7 @@ def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogOb @attr.s @implementer(IPushProducer) -class LogProducer(object): +class LogProducer: """ An IPushProducer that writes logs from its buffer to its transport when it is resumed. @@ -190,7 +190,7 @@ class LogProducer(object): @attr.s @implementer(ILogObserver) -class TerseJSONToTCPLogObserver(object): +class TerseJSONToTCPLogObserver: """ An IObserver that writes JSON logs to a TCP target. diff --git a/synapse/logging/context.py b/synapse/logging/context.py index cbeeb870cb..22598e02d2 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -74,7 +74,7 @@ except Exception: get_thread_id = threading.get_ident -class ContextResourceUsage(object): +class ContextResourceUsage: """Object for tracking the resources used by a log context Attributes: @@ -179,7 +179,7 @@ class ContextResourceUsage(object): LoggingContextOrSentinel = Union["LoggingContext", "_Sentinel"] -class _Sentinel(object): +class _Sentinel: """Sentinel to represent the root context""" __slots__ = ["previous_context", "finished", "request", "scope", "tag"] @@ -226,7 +226,7 @@ class _Sentinel(object): SENTINEL_CONTEXT = _Sentinel() -class LoggingContext(object): +class LoggingContext: """Additional context for log formatting. Contexts are scoped within a "with" block. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index d39ac62168..7df0aa197d 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -185,7 +185,7 @@ if TYPE_CHECKING: # Helper class -class _DummyTagNames(object): +class _DummyTagNames: """wrapper of opentracings tags. We need to have them if we want to reference them without opentracing around. Clearly they should never actually show up in a trace. `set_tags` overwrites diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 6035672698..2643380d9e 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -51,7 +51,7 @@ all_gauges = {} # type: Dict[str, Union[LaterGauge, InFlightGauge, BucketCollec HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") -class RegistryProxy(object): +class RegistryProxy: @staticmethod def collect(): for metric in REGISTRY.collect(): @@ -60,7 +60,7 @@ class RegistryProxy(object): @attr.s(hash=True) -class LaterGauge(object): +class LaterGauge: name = attr.ib(type=str) desc = attr.ib(type=str) @@ -100,7 +100,7 @@ class LaterGauge(object): all_gauges[self.name] = self -class InFlightGauge(object): +class InFlightGauge: """Tracks number of things (e.g. requests, Measure blocks, etc) in flight at any given time. @@ -206,7 +206,7 @@ class InFlightGauge(object): @attr.s(hash=True) -class BucketCollector(object): +class BucketCollector: """ Like a Histogram, but allows buckets to be point-in-time instead of incrementally added to. @@ -269,7 +269,7 @@ class BucketCollector(object): # -class CPUMetrics(object): +class CPUMetrics: def __init__(self): ticks_per_sec = 100 try: @@ -329,7 +329,7 @@ gc_time = Histogram( ) -class GCCounts(object): +class GCCounts: def collect(self): cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"]) for n, m in enumerate(gc.get_count()): @@ -347,7 +347,7 @@ if not running_on_pypy: # -class PyPyGCStats(object): +class PyPyGCStats: def collect(self): # @stats is a pretty-printer object with __str__() returning a nice table, @@ -482,7 +482,7 @@ build_info.labels( last_ticked = time.time() -class ReactorLastSeenMetric(object): +class ReactorLastSeenMetric: def collect(self): cm = GaugeMetricFamily( "python_twisted_reactor_last_seen", diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 4cd7932e5b..5b73463504 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -105,7 +105,7 @@ _background_processes_active_since_last_scrape = set() # type: Set[_BackgroundP _bg_metrics_lock = threading.Lock() -class _Collector(object): +class _Collector: """A custom metrics collector for the background process metrics. Ensures that all of the metrics are up-to-date with any in-flight processes @@ -140,7 +140,7 @@ class _Collector(object): REGISTRY.register(_Collector()) -class _BackgroundProcess(object): +class _BackgroundProcess: def __init__(self, desc, ctx): self.desc = desc self._context = ctx diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index ae0e359a77..fcbd5378c4 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -31,7 +31,7 @@ __all__ = ["errors", "make_deferred_yieldable", "run_in_background", "ModuleApi" logger = logging.getLogger(__name__) -class ModuleApi(object): +class ModuleApi: """A proxy object that gets passed to various plugin modules so they can register new users etc if necessary. """ diff --git a/synapse/notifier.py b/synapse/notifier.py index dfb096e589..b7f4041306 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -68,7 +68,7 @@ def count(func: Callable[[T], bool], it: Iterable[T]) -> int: return n -class _NotificationListener(object): +class _NotificationListener: """ This represents a single client connection to the events stream. The events stream handler will have yielded to the deferred, so to notify the handler it is sufficient to resolve the deferred. @@ -80,7 +80,7 @@ class _NotificationListener(object): self.deferred = deferred -class _NotifierUserStream(object): +class _NotifierUserStream: """This represents a user connected to the event stream. It tracks the most recent stream token for that user. At a given point a user may have a number of streams listening for @@ -168,7 +168,7 @@ class EventStreamResult(namedtuple("EventStreamResult", ("events", "tokens"))): __bool__ = __nonzero__ # python3 -class Notifier(object): +class Notifier: """ This class is responsible for notifying any listeners when there are new events available for it. diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index 0d23142653..fabc9ba126 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -22,7 +22,7 @@ from .bulk_push_rule_evaluator import BulkPushRuleEvaluator logger = logging.getLogger(__name__) -class ActionGenerator(object): +class ActionGenerator: def __init__(self, hs): self.hs = hs self.clock = hs.get_clock() diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index e7fa02b78b..1bb8e346b9 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -95,7 +95,7 @@ def _should_count_as_unread(event: EventBase, context: EventContext) -> bool: return False -class BulkPushRuleEvaluator(object): +class BulkPushRuleEvaluator: """Calculates the outcome of push rules for an event for all users in the room at once. """ @@ -263,7 +263,7 @@ def _condition_checker(evaluator, conditions, uid, display_name, cache): return True -class RulesForRoom(object): +class RulesForRoom: """Caches push rules for users in a room. This efficiently handles users joining/leaving the room by not invalidating diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index 568c13eaea..b7ea4438e0 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -45,7 +45,7 @@ THROTTLE_RESET_AFTER_MS = 12 * 60 * 60 * 1000 INCLUDE_ALL_UNREAD_NOTIFS = False -class EmailPusher(object): +class EmailPusher: """ A pusher that sends email notifications about events (approximately) when they happen. diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 4c469efb20..f21fa9b659 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -49,7 +49,7 @@ http_badges_failed_counter = Counter( ) -class HttpPusher(object): +class HttpPusher: INITIAL_BACKOFF_SEC = 1 # in seconds because that's what Twisted takes MAX_BACKOFF_SEC = 60 * 60 diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index c38e037281..6c57854018 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -92,7 +92,7 @@ ALLOWED_ATTRS = { # ALLOWED_SCHEMES = ["http", "https", "ftp", "mailto"] -class Mailer(object): +class Mailer: def __init__(self, hs, app_name, template_html, template_text): self.hs = hs self.template_html = template_html diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 2d79ada189..709ace01e5 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -105,7 +105,7 @@ def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]: return tweaks -class PushRuleEvaluatorForEvent(object): +class PushRuleEvaluatorForEvent: def __init__( self, event: EventBase, diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index f626797133..2a52e226e3 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -23,7 +23,7 @@ from .httppusher import HttpPusher logger = logging.getLogger(__name__) -class PusherFactory(object): +class PusherFactory: def __init__(self, hs): self.hs = hs self.config = hs.config diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 6a28c2db9d..ba16f22c91 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -33,7 +33,7 @@ from synapse.util.stringutils import random_string logger = logging.getLogger(__name__) -class ReplicationEndpoint(object): +class ReplicationEndpoint: """Helper base class for defining new replication HTTP endpoints. This creates an endpoint under `/_synapse/replication/:NAME/:PATH_ARGS..` diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py index 047f2c50f7..eb74903d68 100644 --- a/synapse/replication/slave/storage/_slaved_id_tracker.py +++ b/synapse/replication/slave/storage/_slaved_id_tracker.py @@ -16,7 +16,7 @@ from synapse.storage.util.id_generators import _load_current_id -class SlavedIdTracker(object): +class SlavedIdTracker: def __init__(self, db_conn, table, column, extra_tables=[], step=1): self.step = step self._current = _load_current_id(db_conn, table, column, step) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 0350923898..0b0d204e64 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -113,7 +113,7 @@ PING_TIMEOUT_MULTIPLIER = 5 PING_TIMEOUT_MS = PING_TIME * PING_TIMEOUT_MULTIPLIER -class ConnectionStates(object): +class ConnectionStates: CONNECTING = "connecting" ESTABLISHED = "established" PAUSED = "paused" diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 41569305df..04d894fb3d 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -58,7 +58,7 @@ class ReplicationStreamProtocolFactory(Factory): ) -class ReplicationStreamer(object): +class ReplicationStreamer: """Handles replication connections. This needs to be poked when new replication data may be available. When new diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 8c3caf30c9..682d47f402 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -79,7 +79,7 @@ StreamUpdateResult = Tuple[List[Tuple[Token, StreamRow]], Token, bool] UpdateFunction = Callable[[str, Token, Token, int], Awaitable[StreamUpdateResult]] -class Stream(object): +class Stream: """Base class for the streams. Provides a `get_updates()` function that returns new updates since the last diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index 16c63ff4ec..f929fc3954 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -49,14 +49,14 @@ data part are: @attr.s(slots=True, frozen=True) -class EventsStreamRow(object): +class EventsStreamRow: """A parsed row from the events replication stream""" type = attr.ib() # str: the TypeId of one of the *EventsStreamRows data = attr.ib() # BaseEventsStreamRow -class BaseEventsStreamRow(object): +class BaseEventsStreamRow: """Base class for rows to be sent in the events stream. Specifies how to identify, serialize and deserialize the different types. diff --git a/synapse/rest/client/transactions.py b/synapse/rest/client/transactions.py index 6da71dc46f..7be5c0fb88 100644 --- a/synapse/rest/client/transactions.py +++ b/synapse/rest/client/transactions.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) CLEANUP_PERIOD_MS = 1000 * 60 * 30 # 30 mins -class HttpTransactionCache(object): +class HttpTransactionCache: def __init__(self, hs): self.hs = hs self.auth = self.hs.get_auth() diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 51372cdb5e..b6b90a8b30 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -658,7 +658,7 @@ class RegisterRestServlet(RestServlet): (object) params: registration parameters, from which we pull device_id, initial_device_name and inhibit_login Returns: - (object) dictionary for response from /register + dictionary for response from /register """ result = {"user_id": user_id, "home_server": self.hs.hostname} if not params.get("inhibit_login", False): diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 20ddb9550b..6568e61829 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -235,7 +235,7 @@ async def respond_with_responder( finish_request(request) -class Responder(object): +class Responder: """Represents a response that can be streamed to the requester. Responder is a context manager which *must* be used, so that any resources @@ -260,7 +260,7 @@ class Responder(object): pass -class FileInfo(object): +class FileInfo: """Details about a requested/uploaded file. Attributes: diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index e25c382c9c..d2826374a7 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -33,7 +33,7 @@ def _wrap_in_base_path(func): return _wrapped -class MediaFilePaths(object): +class MediaFilePaths: """Describes where files are stored on disk. Most of the functions have a `*_rel` variant which returns a file path that diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6fb4039e98..9a1b7779f7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -62,7 +62,7 @@ logger = logging.getLogger(__name__) UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 -class MediaRepository(object): +class MediaRepository: def __init__(self, hs): self.hs = hs self.auth = hs.get_auth() diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index ab1fa705bf..3a352b5631 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -34,7 +34,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class MediaStorage(object): +class MediaStorage: """Responsible for storing/fetching files from local sources. Args: diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 7126997134..d681bf7bf0 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -31,7 +31,7 @@ EXIF_TRANSPOSE_MAPPINGS = { } -class Thumbnailer(object): +class Thumbnailer: FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"} diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py index e15e13b756..f591cc6c5c 100644 --- a/synapse/rest/well_known.py +++ b/synapse/rest/well_known.py @@ -23,7 +23,7 @@ from synapse.util import json_encoder logger = logging.getLogger(__name__) -class WellKnownBuilder(object): +class WellKnownBuilder: """Utility to construct the well-known response Args: diff --git a/synapse/secrets.py b/synapse/secrets.py index ff86950a54..fb6d90a3b7 100644 --- a/synapse/secrets.py +++ b/synapse/secrets.py @@ -37,7 +37,7 @@ else: import binascii import os - class Secrets(object): + class Secrets: def token_bytes(self, nbytes=32): return os.urandom(nbytes) diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py index 089cfef0b3..3673e7f47e 100644 --- a/synapse/server_notices/consent_server_notices.py +++ b/synapse/server_notices/consent_server_notices.py @@ -23,7 +23,7 @@ from synapse.types import get_localpart_from_id logger = logging.getLogger(__name__) -class ConsentServerNotices(object): +class ConsentServerNotices: """Keeps track of whether we need to send users server_notices about privacy policy consent, and sends one if we do. """ diff --git a/synapse/server_notices/resource_limits_server_notices.py b/synapse/server_notices/resource_limits_server_notices.py index c2faef6eab..2258d306d9 100644 --- a/synapse/server_notices/resource_limits_server_notices.py +++ b/synapse/server_notices/resource_limits_server_notices.py @@ -27,7 +27,7 @@ from synapse.server_notices.server_notices_manager import SERVER_NOTICE_ROOM_TAG logger = logging.getLogger(__name__) -class ResourceLimitsServerNotices(object): +class ResourceLimitsServerNotices: """ Keeps track of whether the server has reached it's resource limit and ensures that the client is kept up to date. """ diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index ed96aa8571..0422d4c7ce 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) SERVER_NOTICE_ROOM_TAG = "m.server_notice" -class ServerNoticesManager(object): +class ServerNoticesManager: def __init__(self, hs): """ diff --git a/synapse/server_notices/server_notices_sender.py b/synapse/server_notices/server_notices_sender.py index a754f75db4..6870b67ca0 100644 --- a/synapse/server_notices/server_notices_sender.py +++ b/synapse/server_notices/server_notices_sender.py @@ -20,7 +20,7 @@ from synapse.server_notices.resource_limits_server_notices import ( ) -class ServerNoticesSender(object): +class ServerNoticesSender: """A centralised place which sends server notices automatically when Certain Events take place """ diff --git a/synapse/server_notices/worker_server_notices_sender.py b/synapse/server_notices/worker_server_notices_sender.py index e9390b19da..9273e61895 100644 --- a/synapse/server_notices/worker_server_notices_sender.py +++ b/synapse/server_notices/worker_server_notices_sender.py @@ -14,7 +14,7 @@ # limitations under the License. -class WorkerServerNoticesSender(object): +class WorkerServerNoticesSender: """Stub impl of ServerNoticesSender which does nothing""" def __init__(self, hs): diff --git a/synapse/spam_checker_api/__init__.py b/synapse/spam_checker_api/__init__.py index 9be92e2565..395ac5ab02 100644 --- a/synapse/spam_checker_api/__init__.py +++ b/synapse/spam_checker_api/__init__.py @@ -36,7 +36,7 @@ class RegistrationBehaviour(Enum): DENY = "deny" -class SpamCheckerApi(object): +class SpamCheckerApi: """A proxy object that gets passed to spam checkers so they can get access to rooms and other relevant information. """ diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 9bf2ec368f..c7e3015b5d 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -77,7 +77,7 @@ def _gen_state_id(): return s -class _StateCacheEntry(object): +class _StateCacheEntry: __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"] def __init__( @@ -113,7 +113,7 @@ class _StateCacheEntry(object): return len(self.state) -class StateHandler(object): +class StateHandler: """Fetches bits of state from the stores, and does state resolution where necessary """ @@ -462,7 +462,7 @@ class StateHandler(object): return {key: state_map[ev_id] for key, ev_id in new_state.items()} -class StateResolutionHandler(object): +class StateResolutionHandler: """Responsible for doing state conflict resolution. Note that the storage layer depends on this handler, so all functions must @@ -679,7 +679,7 @@ def resolve_events_with_store( @attr.s -class StateResolutionStore(object): +class StateResolutionStore: """Interface that allows state resolution algorithms to access the database in well defined way. diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 5ef3853559..8e5d78f6f7 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -37,7 +37,7 @@ from synapse.storage.state import StateGroupStorage __all__ = ["DataStores", "DataStore"] -class Storage(object): +class Storage: """The high level interfaces for talking to various storage layers. """ diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 67a89cd51a..810721ebe9 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -24,7 +24,7 @@ from . import engines logger = logging.getLogger(__name__) -class BackgroundUpdatePerformance(object): +class BackgroundUpdatePerformance: """Tracks the how long a background update is taking to update its items""" def __init__(self, name): @@ -71,7 +71,7 @@ class BackgroundUpdatePerformance(object): return float(self.total_item_count) / float(self.total_duration_ms) -class BackgroundUpdater(object): +class BackgroundUpdater: """ Background updates are updates to the database that run in the background. Each update processes a batch of data at once. We attempt to limit the impact of each update by monitoring how long each batch takes to diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 78ca6d8346..8be943f589 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -248,7 +248,7 @@ class LoggingTransaction: self.txn.close() -class PerformanceCounters(object): +class PerformanceCounters: def __init__(self): self.current_counters = {} self.previous_counters = {} @@ -286,7 +286,7 @@ class PerformanceCounters(object): R = TypeVar("R") -class DatabasePool(object): +class DatabasePool: """Wraps a single physical database and connection pool. A single database may be used by multiple data stores. diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py index 0ac854aee2..7f08bd8285 100644 --- a/synapse/storage/databases/__init__.py +++ b/synapse/storage/databases/__init__.py @@ -24,7 +24,7 @@ from synapse.storage.prepare_database import prepare_database logger = logging.getLogger(__name__) -class Databases(object): +class Databases: """The various databases. These are low level interfaces to physical databases. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index c46f5cd524..91a8b43da3 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -999,7 +999,7 @@ class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): await self.db_pool.runInteraction("forget_membership", f) -class _JoinedHostsCache(object): +class _JoinedHostsCache: """Cache for joined hosts in a room that is optimised to handle updates via state deltas. """ diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 4769b21529..afd10f7bae 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -22,6 +22,6 @@ logger = logging.getLogger(__name__) @attr.s(slots=True, frozen=True) -class FetchKeyResult(object): +class FetchKeyResult: verify_key = attr.ib() # VerifyKey: the key itself valid_until_ts = attr.ib() # int: how long we can use this key for diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index f15b95e633..dbaeef91dd 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -69,7 +69,7 @@ stale_forward_extremities_counter = Histogram( ) -class _EventPeristenceQueue(object): +class _EventPeristenceQueue: """Queues up events so that they can be persisted in bulk with only one concurrent transaction per room. """ @@ -172,7 +172,7 @@ class _EventPeristenceQueue(object): pass -class EventsPersistenceStorage(object): +class EventsPersistenceStorage: """High level interface for handling persisting newly received events. Takes care of batching up events by room, and calculating the necessary diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 1c5f305132..964d8d9eb8 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -569,7 +569,7 @@ def _get_or_create_schema_state(txn, database_engine): @attr.s() -class _DirectoryListing(object): +class _DirectoryListing: """Helper class to store schema file name and the absolute path to it. diff --git a/synapse/storage/purge_events.py b/synapse/storage/purge_events.py index 79d9f06e2e..bfa0a9fd06 100644 --- a/synapse/storage/purge_events.py +++ b/synapse/storage/purge_events.py @@ -20,7 +20,7 @@ from typing import Set logger = logging.getLogger(__name__) -class PurgeEventsStorage(object): +class PurgeEventsStorage: """High level interface for purging rooms and event history. """ diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index d471ec9860..d30e3f11e7 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) @attr.s -class PaginationChunk(object): +class PaginationChunk: """Returned by relation pagination APIs. Attributes: @@ -51,7 +51,7 @@ class PaginationChunk(object): @attr.s(frozen=True, slots=True) -class RelationPaginationToken(object): +class RelationPaginationToken: """Pagination token for relation pagination API. As the results are in topological order, we can use the @@ -82,7 +82,7 @@ class RelationPaginationToken(object): @attr.s(frozen=True, slots=True) -class AggregationPaginationToken(object): +class AggregationPaginationToken: """Pagination token for relation aggregation pagination API. As the results are order by count and then MAX(stream_ordering) of the diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 96a1b59d64..8f68d968f0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -29,7 +29,7 @@ T = TypeVar("T") @attr.s(slots=True) -class StateFilter(object): +class StateFilter: """A filter used when querying for state. Attributes: @@ -326,7 +326,7 @@ class StateFilter(object): return member_filter, non_member_filter -class StateGroupStorage(object): +class StateGroupStorage: """High level interface to fetching state for event. """ diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 9f3d23f0a5..76bc3afdfa 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -25,7 +25,7 @@ from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.util.sequence import PostgresSequenceGenerator -class IdGenerator(object): +class IdGenerator: def __init__(self, db_conn, table, column): self._lock = threading.Lock() self._next_id = _load_current_id(db_conn, table, column) @@ -59,7 +59,7 @@ def _load_current_id(db_conn, table, column, step=1): return (max if step > 0 else min)(current_id, step) -class StreamIdGenerator(object): +class StreamIdGenerator: """Used to generate new stream ids when persisting events while keeping track of which transactions have been completed. diff --git a/synapse/streams/config.py b/synapse/streams/config.py index ca7c16ff65..d97dc4d101 100644 --- a/synapse/streams/config.py +++ b/synapse/streams/config.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) MAX_LIMIT = 1000 -class SourcePaginationConfig(object): +class SourcePaginationConfig: """A configuration object which stores pagination parameters for a specific event source.""" @@ -45,7 +45,7 @@ class SourcePaginationConfig(object): ) -class PaginationConfig(object): +class PaginationConfig: """A configuration object which stores pagination parameters.""" diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 7ab46f42bf..92fd5d489f 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -23,7 +23,7 @@ from synapse.handlers.typing import TypingNotificationEventSource from synapse.types import StreamToken -class EventSources(object): +class EventSources: SOURCE_TYPES = { "room": RoomEventSource, "presence": PresenceEventSource, diff --git a/synapse/types.py b/synapse/types.py index f8b9b03850..f7de48f148 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -529,7 +529,7 @@ class ThirdPartyInstanceID( @attr.s(slots=True) -class ReadReceipt(object): +class ReadReceipt: """Information about a read-receipt""" room_id = attr.ib() diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index b2a22dbd5c..3ad4b28fc7 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -46,7 +46,7 @@ def unwrapFirstError(failure): @attr.s -class Clock(object): +class Clock: """ A Clock wraps a Twisted reactor and provides utilities on top of it. diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index dfefbd996d..bb57e27beb 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -36,7 +36,7 @@ from synapse.util import Clock, unwrapFirstError logger = logging.getLogger(__name__) -class ObservableDeferred(object): +class ObservableDeferred: """Wraps a deferred object so that we can add observer deferreds. These observer deferreds do not affect the callback chain of the original deferred. @@ -188,7 +188,7 @@ def yieldable_gather_results(func, iter, *args, **kwargs): ).addErrback(unwrapFirstError) -class Linearizer(object): +class Linearizer: """Limits concurrent access to resources based on a key. Useful to ensure only a few things happen at a time on a given resource. @@ -338,7 +338,7 @@ class Linearizer(object): return new_defer -class ReadWriteLock(object): +class ReadWriteLock: """An async read write lock. Example: @@ -502,7 +502,7 @@ def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None): @attr.s(slots=True, frozen=True) -class DoneAwaitable(object): +class DoneAwaitable: """Simple awaitable that returns the provided value. """ diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index dd356bf156..237f588658 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -43,7 +43,7 @@ response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["n @attr.s -class CacheMetric(object): +class CacheMetric: _cache = attr.ib() _cache_type = attr.ib(type=str) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 825810eb16..98b34f2223 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -64,7 +64,7 @@ cache_pending_metric = Gauge( _CacheSentinel = object() -class CacheEntry(object): +class CacheEntry: __slots__ = ["deferred", "callbacks", "invalidated"] def __init__(self, deferred, callbacks): @@ -80,7 +80,7 @@ class CacheEntry(object): self.callbacks.clear() -class Cache(object): +class Cache: __slots__ = ( "cache", "name", @@ -288,7 +288,7 @@ class Cache(object): self._pending_deferred_cache.clear() -class _CacheDescriptorBase(object): +class _CacheDescriptorBase: def __init__(self, orig: _CachedFunction, num_args, cache_context=False): self.orig = orig @@ -705,7 +705,7 @@ def cachedList( Example: - class Example(object): + class Example: @cached(num_args=2) def do_something(self, first_arg): ... diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 6834e6f3ae..8592b93689 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -40,7 +40,7 @@ class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "va return len(self.value) -class DictionaryCache(object): +class DictionaryCache: """Caches key -> dictionary lookups, supporting caching partial dicts, i.e. fetching a subset of dictionary keys for a particular key. """ @@ -53,7 +53,7 @@ class DictionaryCache(object): self.thread = None # caches_by_name[name] = self.cache - class Sentinel(object): + class Sentinel: __slots__ = [] self.sentinel = Sentinel() diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 89a3420f92..e15f7ee698 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) SENTINEL = object() -class ExpiringCache(object): +class ExpiringCache: def __init__( self, cache_name, @@ -190,7 +190,7 @@ class ExpiringCache(object): return False -class _CacheEntry(object): +class _CacheEntry: __slots__ = ["time", "value"] def __init__(self, time, value): diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index df4ea5901d..4bc1a67b58 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -30,7 +30,7 @@ def enumerate_leaves(node, depth): yield m -class _Node(object): +class _Node: __slots__ = ["prev_node", "next_node", "key", "value", "callbacks"] def __init__(self, prev_node, next_node, key, value, callbacks=set()): @@ -41,7 +41,7 @@ class _Node(object): self.callbacks = callbacks -class LruCache(object): +class LruCache: """ Least-recently-used cache. Supports del_multi only if cache_type=TreeCache diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py index a6c60888e5..df1a721add 100644 --- a/synapse/util/caches/response_cache.py +++ b/synapse/util/caches/response_cache.py @@ -23,7 +23,7 @@ from synapse.util.caches import register_cache logger = logging.getLogger(__name__) -class ResponseCache(object): +class ResponseCache: """ This caches a deferred response. Until the deferred completes it will be returned from the cache. This means that if the client retries the request diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index ecd9948e79..eb4d98f683 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -3,7 +3,7 @@ from typing import Dict SENTINEL = object() -class TreeCache(object): +class TreeCache: """ Tree-based backing store for LruCache. Allows subtrees of data to be deleted efficiently. @@ -89,7 +89,7 @@ def iterate_tree_cache_entry(d): yield d -class _Entry(object): +class _Entry: __slots__ = ["value"] def __init__(self, value): diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py index 6437aa907e..3e180cafd3 100644 --- a/synapse/util/caches/ttlcache.py +++ b/synapse/util/caches/ttlcache.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) SENTINEL = object() -class TTLCache(object): +class TTLCache: """A key/value cache implementation where each entry has its own TTL""" def __init__(self, cache_name, timer=time.time): @@ -154,7 +154,7 @@ class TTLCache(object): @attr.s(frozen=True, slots=True) -class _CacheEntry(object): +class _CacheEntry: """TTLCache entry""" # expiry_time is the first attribute, so that entries are sorted by expiry. diff --git a/synapse/util/distributor.py b/synapse/util/distributor.py index 22a857a306..a750261e77 100644 --- a/synapse/util/distributor.py +++ b/synapse/util/distributor.py @@ -34,7 +34,7 @@ def user_joined_room(distributor, user, room_id): distributor.fire("user_joined_room", user=user, room_id=room_id) -class Distributor(object): +class Distributor: """A central dispatch point for loosely-connected pieces of code to register, observe, and fire signals. @@ -103,7 +103,7 @@ def maybeAwaitableDeferred(f, *args, **kw): return succeed(result) -class Signal(object): +class Signal: """A Signal is a dispatch point that stores a list of callables as observers of it. diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 6a3f6177b1..733f5e26e6 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -20,7 +20,7 @@ from twisted.internet import threads from synapse.logging.context import make_deferred_yieldable, run_in_background -class BackgroundFileConsumer(object): +class BackgroundFileConsumer: """A consumer that writes to a file like object. Supports both push and pull producers diff --git a/synapse/util/jsonobject.py b/synapse/util/jsonobject.py index 6dce03dd3a..50516926f3 100644 --- a/synapse/util/jsonobject.py +++ b/synapse/util/jsonobject.py @@ -14,7 +14,7 @@ # limitations under the License. -class JsonEncodedObject(object): +class JsonEncodedObject: """ A common base class for defining protocol units that are represented as JSON. diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 13775b43f9..6e57c1ee72 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -93,7 +93,7 @@ def measure_func(name: Optional[str] = None) -> Callable[[T], T]: return wrapper -class Measure(object): +class Measure: __slots__ = [ "clock", "name", diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index e5efdfcd02..70d11e1ec3 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -29,7 +29,7 @@ from synapse.logging.context import ( logger = logging.getLogger(__name__) -class FederationRateLimiter(object): +class FederationRateLimiter: def __init__(self, clock, config): """ Args: @@ -60,7 +60,7 @@ class FederationRateLimiter(object): return self.ratelimiters[host].ratelimit() -class _PerHostRatelimiter(object): +class _PerHostRatelimiter: def __init__(self, clock, config): """ Args: diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 919988d3bc..79869aaa44 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -114,7 +114,7 @@ async def get_retry_limiter(destination, clock, store, ignore_backoff=False, **k ) -class RetryDestinationLimiter(object): +class RetryDestinationLimiter: def __init__( self, destination, diff --git a/synapse/util/wheel_timer.py b/synapse/util/wheel_timer.py index 023beb5ede..be3b22469d 100644 --- a/synapse/util/wheel_timer.py +++ b/synapse/util/wheel_timer.py @@ -14,7 +14,7 @@ # limitations under the License. -class _Entry(object): +class _Entry: __slots__ = ["end_key", "queue"] def __init__(self, end_key): @@ -22,7 +22,7 @@ class _Entry(object): self.queue = [] -class WheelTimer(object): +class WheelTimer: """Stores arbitrary objects that will be returned after their timers have expired. """ diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index 5d45689c8c..8ab56ec94c 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -36,7 +36,7 @@ from tests import unittest from tests.utils import mock_getRawHeaders, setup_test_homeserver -class TestHandlers(object): +class TestHandlers: def __init__(self, hs): self.auth_handler = synapse.handlers.auth.AuthHandler(hs) diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index d264653e74..2e6e7abf1f 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -43,7 +43,7 @@ from tests import unittest from tests.test_utils import make_awaitable -class MockPerspectiveServer(object): +class MockPerspectiveServer: def __init__(self): self.server_name = "mock_server" self.key = signedjson.key.generate_signing_key(0) diff --git a/tests/federation/transport/test_server.py b/tests/federation/transport/test_server.py index 27d83bb7d9..72e22d655f 100644 --- a/tests/federation/transport/test_server.py +++ b/tests/federation/transport/test_server.py @@ -26,7 +26,7 @@ from tests.unittest import override_config class RoomDirectoryFederationTests(unittest.HomeserverTestCase): def prepare(self, reactor, clock, homeserver): - class Authenticator(object): + class Authenticator: def authenticate_request(self, request, content): return defer.succeed("otherserver.nottld") diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py index 4b3fb018b1..c7efd3822d 100644 --- a/tests/handlers/test_auth.py +++ b/tests/handlers/test_auth.py @@ -28,7 +28,7 @@ from tests.test_utils import make_awaitable from tests.utils import setup_test_homeserver -class AuthHandlers(object): +class AuthHandlers: def __init__(self, hs): self.auth_handler = AuthHandler(hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 60ebc95f3e..8e95e53d9e 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -28,7 +28,7 @@ from tests.test_utils import make_awaitable from tests.utils import setup_test_homeserver -class ProfileHandlers(object): +class ProfileHandlers: def __init__(self, hs): self.profile_handler = MasterProfileHandler(hs) diff --git a/tests/http/__init__.py b/tests/http/__init__.py index 2096ba3c91..5d41443293 100644 --- a/tests/http/__init__.py +++ b/tests/http/__init__.py @@ -133,7 +133,7 @@ def create_test_cert_file(sanlist): @implementer(IOpenSSLServerConnectionCreator) -class TestServerTLSConnectionFactory(object): +class TestServerTLSConnectionFactory: """An SSL connection creator which returns connections which present a certificate signed by our test CA.""" diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index eb78ab412a..8b5ad4574f 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -1264,7 +1264,7 @@ def _log_request(request): @implementer(IPolicyForHTTPS) -class TrustingTLSPolicyForHTTPS(object): +class TrustingTLSPolicyForHTTPS: """An IPolicyForHTTPS which checks that the certificate belongs to the right server, but doesn't check the certificate chain.""" diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index 451d05c0f0..d36f5f426c 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -29,12 +29,12 @@ from synapse.logging.context import LoggingContext from tests.unittest import DEBUG, HomeserverTestCase -class FakeBeginner(object): +class FakeBeginner: def beginLoggingTo(self, observers, **kwargs): self.observers = observers -class StructuredLoggingTestBase(object): +class StructuredLoggingTestBase: """ Test base that registers a cleanup handler to reset the stdlib log handler to 'unset'. diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 227b0d32d0..3224568640 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -27,7 +27,7 @@ from tests.unittest import HomeserverTestCase @attr.s -class _User(object): +class _User: "Helper wrapper for user ID and access token" id = attr.ib() token = attr.ib() diff --git a/tests/rest/client/third_party_rules.py b/tests/rest/client/third_party_rules.py index 7167fc56b6..8c24add530 100644 --- a/tests/rest/client/third_party_rules.py +++ b/tests/rest/client/third_party_rules.py @@ -19,7 +19,7 @@ from synapse.rest.client.v1 import login, room from tests import unittest -class ThirdPartyRulesTestModule(object): +class ThirdPartyRulesTestModule: def __init__(self, config): pass diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py index e66c9a4c4c..afaf9f7b85 100644 --- a/tests/rest/client/v1/utils.py +++ b/tests/rest/client/v1/utils.py @@ -30,7 +30,7 @@ from tests.server import make_request, render @attr.s -class RestHelper(object): +class RestHelper: """Contains extra helper functions to quickly and clearly perform a given REST action, which isn't the focus of the test. """ diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 74765a582b..c00a7b9114 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -32,7 +32,7 @@ from tests.server import FakeTransport @attr.s -class FakeResponse(object): +class FakeResponse: version = attr.ib() code = attr.ib() phrase = attr.ib() @@ -43,7 +43,7 @@ class FakeResponse(object): @property def request(self): @attr.s - class FakeTransport(object): + class FakeTransport: absoluteURI = self.absoluteURI return FakeTransport() @@ -111,7 +111,7 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.lookups = {} - class Resolver(object): + class Resolver: def resolveHostName( _self, resolutionReceiver, diff --git a/tests/server.py b/tests/server.py index b6e0b14e78..48e45c6c8b 100644 --- a/tests/server.py +++ b/tests/server.py @@ -35,7 +35,7 @@ class TimedOutException(Exception): @attr.s -class FakeChannel(object): +class FakeChannel: """ A fake Twisted Web Channel (the part that interfaces with the wire). @@ -242,7 +242,7 @@ class ThreadedMemoryReactorClock(MemoryReactorClock): lookups = self.lookups = {} @implementer(IResolverSimple) - class FakeResolver(object): + class FakeResolver: def getHostByName(self, name, timeout=None): if name not in lookups: return fail(DNSLookupError("OH NO: unknown %s" % (name,))) @@ -371,7 +371,7 @@ def get_clock(): @attr.s(cmp=False) -class FakeTransport(object): +class FakeTransport: """ A twisted.internet.interfaces.ITransport implementation which sends all its data straight into an IProtocol object: it exists to connect two IProtocols together. diff --git a/tests/state/test_v2.py b/tests/state/test_v2.py index f2955a9c69..ad9bbef9d2 100644 --- a/tests/state/test_v2.py +++ b/tests/state/test_v2.py @@ -49,7 +49,7 @@ class FakeClock: return defer.succeed(None) -class FakeEvent(object): +class FakeEvent: """A fake event we use as a convenience. NOTE: Again as a convenience we use "node_ids" rather than event_ids to @@ -595,7 +595,7 @@ def pairwise(iterable): @attr.s -class TestStateResolutionStore(object): +class TestStateResolutionStore: event_map = attr.ib() def get_events(self, event_ids, allow_rejected=False): diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 319e2c2325..f5afed017c 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -99,7 +99,7 @@ class CacheTestCase(unittest.HomeserverTestCase): class CacheDecoratorTestCase(unittest.HomeserverTestCase): @defer.inlineCallbacks def test_passthrough(self): - class A(object): + class A: @cached() def func(self, key): return key @@ -113,7 +113,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): def test_hit(self): callcount = [0] - class A(object): + class A: @cached() def func(self, key): callcount[0] += 1 @@ -131,7 +131,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): def test_invalidate(self): callcount = [0] - class A(object): + class A: @cached() def func(self, key): callcount[0] += 1 @@ -149,7 +149,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): self.assertEquals(callcount[0], 2) def test_invalidate_missing(self): - class A(object): + class A: @cached() def func(self, key): return key @@ -160,7 +160,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): def test_max_entries(self): callcount = [0] - class A(object): + class A: @cached(max_entries=10) def func(self, key): callcount[0] += 1 @@ -187,7 +187,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): d = defer.succeed(123) - class A(object): + class A: @cached() def func(self, key): callcount[0] += 1 @@ -205,7 +205,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): callcount = [0] callcount2 = [0] - class A(object): + class A: @cached() def func(self, key): callcount[0] += 1 @@ -238,7 +238,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): callcount = [0] callcount2 = [0] - class A(object): + class A: @cached(max_entries=2) def func(self, key): callcount[0] += 1 @@ -275,7 +275,7 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): callcount = [0] callcount2 = [0] - class A(object): + class A: @cached() def func(self, key): callcount[0] += 1 diff --git a/tests/test_state.py b/tests/test_state.py index 56ba0fecf5..2d58467932 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -71,7 +71,7 @@ def create_event( return event -class StateGroupStore(object): +class StateGroupStore: def __init__(self): self._event_to_state_group = {} self._group_to_state = {} @@ -129,7 +129,7 @@ class DictObj(dict): self.__dict__ = self -class Graph(object): +class Graph: def __init__(self, nodes, edges): events = {} clobbered = set(events.keys()) diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 4a4483ba12..510b630114 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -294,7 +294,7 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): test_large_room.skip = "Disabled by default because it's slow" -class _TestStore(object): +class _TestStore: """Implements a few methods of the DataStore, so that we can test filter_events_for_server diff --git a/tests/unittest.py b/tests/unittest.py index 7b80999a74..3cb55a7e96 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -614,7 +614,7 @@ class FederatingHomeserverTestCase(HomeserverTestCase): """ def prepare(self, reactor, clock, homeserver): - class Authenticator(object): + class Authenticator: def authenticate_request(self, request, content): return succeed("other.example.com") diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py index 0363735d4f..677e925477 100644 --- a/tests/util/caches/test_descriptors.py +++ b/tests/util/caches/test_descriptors.py @@ -88,7 +88,7 @@ class CacheTestCase(unittest.TestCase): class DescriptorTestCase(unittest.TestCase): @defer.inlineCallbacks def test_cache(self): - class Cls(object): + class Cls: def __init__(self): self.mock = mock.Mock() @@ -122,7 +122,7 @@ class DescriptorTestCase(unittest.TestCase): def test_cache_num_args(self): """Only the first num_args arguments should matter to the cache""" - class Cls(object): + class Cls: def __init__(self): self.mock = mock.Mock() @@ -156,7 +156,7 @@ class DescriptorTestCase(unittest.TestCase): """If the wrapped function throws synchronously, things should continue to work """ - class Cls(object): + class Cls: @cached() def fn(self, arg1): raise SynapseError(100, "mai spoon iz too big!!1") @@ -180,7 +180,7 @@ class DescriptorTestCase(unittest.TestCase): complete_lookup = defer.Deferred() - class Cls(object): + class Cls: @descriptors.cached() def fn(self, arg1): @defer.inlineCallbacks @@ -223,7 +223,7 @@ class DescriptorTestCase(unittest.TestCase): """Check that the cache sets and restores logcontexts correctly when the lookup function throws an exception""" - class Cls(object): + class Cls: @descriptors.cached() def fn(self, arg1): @defer.inlineCallbacks @@ -263,7 +263,7 @@ class DescriptorTestCase(unittest.TestCase): @defer.inlineCallbacks def test_cache_default_args(self): - class Cls(object): + class Cls: def __init__(self): self.mock = mock.Mock() @@ -300,7 +300,7 @@ class DescriptorTestCase(unittest.TestCase): obj.mock.assert_not_called() def test_cache_iterable(self): - class Cls(object): + class Cls: def __init__(self): self.mock = mock.Mock() @@ -336,7 +336,7 @@ class DescriptorTestCase(unittest.TestCase): """If the wrapped function throws synchronously, things should continue to work """ - class Cls(object): + class Cls: @descriptors.cached(iterable=True) def fn(self, arg1): raise SynapseError(100, "mai spoon iz too big!!1") @@ -358,7 +358,7 @@ class DescriptorTestCase(unittest.TestCase): class CachedListDescriptorTestCase(unittest.TestCase): @defer.inlineCallbacks def test_cache(self): - class Cls(object): + class Cls: def __init__(self): self.mock = mock.Mock() @@ -408,7 +408,7 @@ class CachedListDescriptorTestCase(unittest.TestCase): def test_invalidate(self): """Make sure that invalidation callbacks are called.""" - class Cls(object): + class Cls: def __init__(self): self.mock = mock.Mock() diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py index 8d6627ec33..2012263184 100644 --- a/tests/util/test_file_consumer.py +++ b/tests/util/test_file_consumer.py @@ -112,7 +112,7 @@ class FileConsumerTests(unittest.TestCase): self.assertTrue(string_file.closed) -class DummyPullProducer(object): +class DummyPullProducer: def __init__(self): self.consumer = None self.deferred = defer.Deferred() @@ -134,7 +134,7 @@ class DummyPullProducer(object): return d -class BlockingStringWrite(object): +class BlockingStringWrite: def __init__(self): self.buffer = "" self.closed = False diff --git a/tests/utils.py b/tests/utils.py index a61cbdef44..4673872f88 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -472,7 +472,7 @@ class MockHttpResource(HttpServer): self.callbacks.append((method, path_pattern, callback)) -class MockKey(object): +class MockKey: alg = "mock_alg" version = "mock_version" signature = b"\x9a\x87$" @@ -491,7 +491,7 @@ class MockKey(object): return b"" -class MockClock(object): +class MockClock: now = 1000 def __init__(self): @@ -568,7 +568,7 @@ def _format_call(args, kwargs): ) -class DeferredMockCallable(object): +class DeferredMockCallable: """A callable instance that stores a set of pending call expectations and return values for them. It allows a unit test to assert that the given set of function calls are eventually made, by awaiting on them to be called. -- cgit 1.5.1 From 0dae7d80bfb497d417a53e52b8353bbcd6d10d58 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 7 Sep 2020 13:36:02 +0100 Subject: Add more logging to debug slow startup (#8264) I'm hoping this will provide some pointers for debugging https://github.com/matrix-org/synapse/issues/7968. --- changelog.d/8264.misc | 1 + synapse/storage/databases/__init__.py | 23 ++++++++++++++++------- synapse/storage/databases/main/__init__.py | 1 + synapse/storage/prepare_database.py | 10 ++++++++++ synapse/storage/util/id_generators.py | 5 +++++ 5 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 changelog.d/8264.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8264.misc b/changelog.d/8264.misc new file mode 100644 index 0000000000..600b175655 --- /dev/null +++ b/changelog.d/8264.misc @@ -0,0 +1 @@ +Add more logging to debug slow startup. diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py index 7f08bd8285..985b12df91 100644 --- a/synapse/storage/databases/__init__.py +++ b/synapse/storage/databases/__init__.py @@ -47,9 +47,14 @@ class Databases: engine = create_engine(database_config.config) with make_conn(database_config, engine) as db_conn: - logger.info("Preparing database %r...", db_name) - + logger.info("[database config %r]: Checking database server", db_name) engine.check_database(db_conn) + + logger.info( + "[database config %r]: Preparing for databases %r", + db_name, + database_config.databases, + ) prepare_database( db_conn, engine, hs.config, databases=database_config.databases, ) @@ -57,7 +62,9 @@ class Databases: database = DatabasePool(hs, database_config, engine) if "main" in database_config.databases: - logger.info("Starting 'main' data store") + logger.info( + "[database config %r]: Starting 'main' database", db_name + ) # Sanity check we don't try and configure the main store on # multiple databases. @@ -72,7 +79,9 @@ class Databases: persist_events = PersistEventsStore(hs, database, main) if "state" in database_config.databases: - logger.info("Starting 'state' data store") + logger.info( + "[database config %r]: Starting 'state' database", db_name + ) # Sanity check we don't try and configure the state store on # multiple databases. @@ -85,7 +94,7 @@ class Databases: self.databases.append(database) - logger.info("Database %r prepared", db_name) + logger.info("[database config %r]: prepared", db_name) # Closing the context manager doesn't close the connection. # psycopg will close the connection when the object gets GCed, but *only* @@ -98,10 +107,10 @@ class Databases: # Sanity check that we have actually configured all the required stores. if not main: - raise Exception("No 'main' data store configured") + raise Exception("No 'main' database configured") if not state: - raise Exception("No 'state' data store configured") + raise Exception("No 'state' database configured") # We use local variables here to ensure that the databases do not have # optional types. diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 99890ffbf3..b315ae66d2 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -591,6 +591,7 @@ def check_database_before_upgrade(cur, database_engine, config: HomeServerConfig """Called before upgrading an existing database to check that it is broadly sane compared with the configuration. """ + logger.info("Running sanity-checks on database...") domain = config.server_name sql = database_engine.convert_param_style( diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 93dca96476..ee60e2a718 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -82,10 +82,18 @@ def prepare_database(db_conn, database_engine, config, databases=["main", "state try: cur = db_conn.cursor() + + logger.info("%r: Checking existing schema version", databases) version_info = _get_or_create_schema_state(cur, database_engine) if version_info: user_version, delta_files, upgraded = version_info + logger.info( + "%r: Existing schema is %i (+%i deltas)", + databases, + user_version, + len(delta_files), + ) # config should only be None when we are preparing an in-memory SQLite db, # which should be empty. @@ -111,6 +119,8 @@ def prepare_database(db_conn, database_engine, config, databases=["main", "state databases=databases, ) else: + logger.info("%r: Initialising new database", databases) + # if it's a worker app, refuse to upgrade the database, to avoid multiple # workers doing it at once. if config and config.worker_app is not None: diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 76bc3afdfa..b7eb4f8ac9 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -15,6 +15,7 @@ import contextlib import heapq +import logging import threading from collections import deque from typing import Dict, List, Set @@ -24,6 +25,8 @@ from typing_extensions import Deque from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.util.sequence import PostgresSequenceGenerator +logger = logging.getLogger(__name__) + class IdGenerator: def __init__(self, db_conn, table, column): @@ -48,6 +51,8 @@ def _load_current_id(db_conn, table, column, step=1): Returns: int """ + # debug logging for https://github.com/matrix-org/synapse/issues/7968 + logger.info("initialising stream generator for %s(%s)", table, column) cur = db_conn.cursor() if step == 1: cur.execute("SELECT MAX(%s) FROM %s" % (column, table)) -- cgit 1.5.1 From deedb917325ea9ce8085df45dd925b8d583fd661 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Sep 2020 14:26:54 +0100 Subject: Fix `MultiWriterIdGenerator.current_position`. (#8257) It did not correctly handle IDs finishing being persisted out of order, resulting in the `current_position` lagging until new IDs are persisted. --- changelog.d/8257.misc | 1 + synapse/storage/util/id_generators.py | 43 +++++++++++++++++++++++++----- tests/storage/test_id_generators.py | 50 +++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 changelog.d/8257.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8257.misc b/changelog.d/8257.misc new file mode 100644 index 0000000000..47ac583eb4 --- /dev/null +++ b/changelog.d/8257.misc @@ -0,0 +1 @@ +Fix non-user visible bug in implementation of `MultiWriterIdGenerator.get_current_token_for_writer`. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index b7eb4f8ac9..2a66b3ad4e 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -224,6 +224,10 @@ class MultiWriterIdGenerator: # should be less than the minimum of this set (if not empty). self._unfinished_ids = set() # type: Set[int] + # Set of local IDs that we've processed that are larger than the current + # position, due to there being smaller unpersisted IDs. + self._finished_ids = set() # type: Set[int] + # We track the max position where we know everything before has been # persisted. This is done by a) looking at the min across all instances # and b) noting that if we have seen a run of persisted positions @@ -348,17 +352,44 @@ class MultiWriterIdGenerator: def _mark_id_as_finished(self, next_id: int): """The ID has finished being processed so we should advance the - current poistion if possible. + current position if possible. """ with self._lock: self._unfinished_ids.discard(next_id) + self._finished_ids.add(next_id) + + new_cur = None + + if self._unfinished_ids: + # If there are unfinished IDs then the new position will be the + # largest finished ID less than the minimum unfinished ID. + + finished = set() + + min_unfinshed = min(self._unfinished_ids) + for s in self._finished_ids: + if s < min_unfinshed: + if new_cur is None or new_cur < s: + new_cur = s + else: + finished.add(s) + + # We clear these out since they're now all less than the new + # position. + self._finished_ids = finished + else: + # There are no unfinished IDs so the new position is simply the + # largest finished one. + new_cur = max(self._finished_ids) + + # We clear these out since they're now all less than the new + # position. + self._finished_ids.clear() - # Figure out if its safe to advance the position by checking there - # aren't any lower allocated IDs that are yet to finish. - if all(c > next_id for c in self._unfinished_ids): + if new_cur: curr = self._current_positions.get(self._instance_name, 0) - self._current_positions[self._instance_name] = max(curr, next_id) + self._current_positions[self._instance_name] = max(curr, new_cur) self._add_persisted_position(next_id) @@ -428,7 +459,7 @@ class MultiWriterIdGenerator: # We move the current min position up if the minimum current positions # of all instances is higher (since by definition all positions less # that that have been persisted). - min_curr = min(self._current_positions.values()) + min_curr = min(self._current_positions.values(), default=0) self._persisted_upto_position = max(min_curr, self._persisted_upto_position) # We now iterate through the seen positions, discarding those that are diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index f0a8e32f1e..20636fc400 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -122,6 +122,56 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.assertEqual(id_gen.get_positions(), {"master": 8}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 8) + def test_out_of_order_finish(self): + """Test that IDs persisted out of order are correctly handled + """ + + # Prefill table with 7 rows written by 'master' + self._insert_rows("master", 7) + + id_gen = self._create_id_generator() + + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + + ctx1 = self.get_success(id_gen.get_next()) + ctx2 = self.get_success(id_gen.get_next()) + ctx3 = self.get_success(id_gen.get_next()) + ctx4 = self.get_success(id_gen.get_next()) + + s1 = ctx1.__enter__() + s2 = ctx2.__enter__() + s3 = ctx3.__enter__() + s4 = ctx4.__enter__() + + self.assertEqual(s1, 8) + self.assertEqual(s2, 9) + self.assertEqual(s3, 10) + self.assertEqual(s4, 11) + + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + + ctx2.__exit__(None, None, None) + + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + + ctx1.__exit__(None, None, None) + + self.assertEqual(id_gen.get_positions(), {"master": 9}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 9) + + ctx4.__exit__(None, None, None) + + self.assertEqual(id_gen.get_positions(), {"master": 9}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 9) + + ctx3.__exit__(None, None, None) + + self.assertEqual(id_gen.get_positions(), {"master": 11}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 11) + def test_multi_instance(self): """Test that reads and writes from multiple processes are handled correctly. -- cgit 1.5.1 From 04cc249b43e8716513f788b2a4eeb8ede24d19df Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 14 Sep 2020 10:16:41 +0100 Subject: Add experimental support for sharding event persister. Again. (#8294) This is *not* ready for production yet. Caveats: 1. We should write some tests... 2. The stream token that we use for events can get stalled at the minimum position of all writers. This means that new events may not be processed and e.g. sent down sync streams if a writer isn't writing or is slow. --- changelog.d/8294.feature | 1 + synapse/config/_base.py | 21 ++++++- synapse/config/_base.pyi | 1 + synapse/config/workers.py | 37 ++++++++---- synapse/handlers/federation.py | 44 ++++++++++----- synapse/handlers/message.py | 14 +++-- synapse/handlers/room.py | 14 +++-- synapse/handlers/room_member.py | 7 --- synapse/replication/http/federation.py | 12 +++- synapse/replication/tcp/handler.py | 2 +- synapse/replication/tcp/streams/events.py | 4 +- synapse/storage/databases/__init__.py | 2 +- synapse/storage/databases/main/event_federation.py | 2 +- synapse/storage/databases/main/events.py | 12 ++-- synapse/storage/databases/main/events_worker.py | 66 +++++++++++++++------- .../schema/delta/58/14events_instance_name.sql | 16 ++++++ .../delta/58/14events_instance_name.sql.postgres | 26 +++++++++ synapse/storage/util/id_generators.py | 10 ++-- 18 files changed, 211 insertions(+), 80 deletions(-) create mode 100644 changelog.d/8294.feature create mode 100644 synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql create mode 100644 synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8294.feature b/changelog.d/8294.feature new file mode 100644 index 0000000000..b363e929ea --- /dev/null +++ b/changelog.d/8294.feature @@ -0,0 +1 @@ +Add experimental support for sharding event persister. diff --git a/synapse/config/_base.py b/synapse/config/_base.py index ad5ab6ad62..bb9bf8598d 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -832,11 +832,26 @@ class ShardedWorkerHandlingConfig: def should_handle(self, instance_name: str, key: str) -> bool: """Whether this instance is responsible for handling the given key. """ - - # If multiple instances are not defined we always return true. + # If multiple instances are not defined we always return true if not self.instances or len(self.instances) == 1: return True + return self.get_instance(key) == instance_name + + def get_instance(self, key: str) -> str: + """Get the instance responsible for handling the given key. + + Note: For things like federation sending the config for which instance + is sending is known only to the sender instance if there is only one. + Therefore `should_handle` should be used where possible. + """ + + if not self.instances: + return "master" + + if len(self.instances) == 1: + return self.instances[0] + # We shard by taking the hash, modulo it by the number of instances and # then checking whether this instance matches the instance at that # index. @@ -846,7 +861,7 @@ class ShardedWorkerHandlingConfig: dest_hash = sha256(key.encode("utf8")).digest() dest_int = int.from_bytes(dest_hash, byteorder="little") remainder = dest_int % (len(self.instances)) - return self.instances[remainder] == instance_name + return self.instances[remainder] __all__ = ["Config", "RootConfig", "ShardedWorkerHandlingConfig"] diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index eb911e8f9f..b8faafa9bd 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -142,3 +142,4 @@ class ShardedWorkerHandlingConfig: instances: List[str] def __init__(self, instances: List[str]) -> None: ... def should_handle(self, instance_name: str, key: str) -> bool: ... + def get_instance(self, key: str) -> str: ... diff --git a/synapse/config/workers.py b/synapse/config/workers.py index c784a71508..f23e42cdf9 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -13,12 +13,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List, Union + import attr from ._base import Config, ConfigError, ShardedWorkerHandlingConfig from .server import ListenerConfig, parse_listener_def +def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: + """Helper for allowing parsing a string or list of strings to a config + option expecting a list of strings. + """ + + if isinstance(obj, str): + return [obj] + return obj + + @attr.s class InstanceLocationConfig: """The host and port to talk to an instance via HTTP replication. @@ -33,11 +45,13 @@ class WriterLocations: """Specifies the instances that write various streams. Attributes: - events: The instance that writes to the event and backfill streams. - events: The instance that writes to the typing stream. + events: The instances that write to the event and backfill streams. + typing: The instance that writes to the typing stream. """ - events = attr.ib(default="master", type=str) + events = attr.ib( + default=["master"], type=List[str], converter=_instance_to_list_converter + ) typing = attr.ib(default="master", type=str) @@ -105,15 +119,18 @@ class WorkerConfig(Config): writers = config.get("stream_writers") or {} self.writers = WriterLocations(**writers) - # Check that the configured writer for events and typing also appears in + # Check that the configured writers for events and typing also appears in # `instance_map`. for stream in ("events", "typing"): - instance = getattr(self.writers, stream) - if instance != "master" and instance not in self.instance_map: - raise ConfigError( - "Instance %r is configured to write %s but does not appear in `instance_map` config." - % (instance, stream) - ) + instances = _instance_to_list_converter(getattr(self.writers, stream)) + for instance in instances: + if instance != "master" and instance not in self.instance_map: + raise ConfigError( + "Instance %r is configured to write %s but does not appear in `instance_map` config." + % (instance, stream) + ) + + self.events_shard_config = ShardedWorkerHandlingConfig(self.writers.events) def generate_config_section(self, config_dir_path, server_name, **kwargs): return """\ diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c195eba830..a5734bebab 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -896,7 +896,8 @@ class FederationHandler(BaseHandler): ) ) - await self._handle_new_events(dest, ev_infos, backfilled=True) + if ev_infos: + await self._handle_new_events(dest, room_id, ev_infos, backfilled=True) # Step 2: Persist the rest of the events in the chunk one by one events.sort(key=lambda e: e.depth) @@ -1189,7 +1190,7 @@ class FederationHandler(BaseHandler): event_infos.append(_NewEventInfo(event, None, auth)) await self._handle_new_events( - destination, event_infos, + destination, room_id, event_infos, ) def _sanity_check_event(self, ev): @@ -1336,15 +1337,15 @@ class FederationHandler(BaseHandler): ) max_stream_id = await self._persist_auth_tree( - origin, auth_chain, state, event, room_version_obj + origin, room_id, auth_chain, state, event, room_version_obj ) # We wait here until this instance has seen the events come down # replication (if we're using replication) as the below uses caches. - # - # TODO: Currently the events stream is written to from master await self._replication.wait_for_stream_position( - self.config.worker.writers.events, "events", max_stream_id + self.config.worker.events_shard_config.get_instance(room_id), + "events", + max_stream_id, ) # Check whether this room is the result of an upgrade of a room we already know @@ -1593,7 +1594,7 @@ class FederationHandler(BaseHandler): ) context = await self.state_handler.compute_event_context(event) - await self.persist_events_and_notify([(event, context)]) + await self.persist_events_and_notify(event.room_id, [(event, context)]) return event @@ -1620,7 +1621,9 @@ class FederationHandler(BaseHandler): await self.federation_client.send_leave(host_list, event) context = await self.state_handler.compute_event_context(event) - stream_id = await self.persist_events_and_notify([(event, context)]) + stream_id = await self.persist_events_and_notify( + event.room_id, [(event, context)] + ) return event, stream_id @@ -1868,7 +1871,7 @@ class FederationHandler(BaseHandler): ) await self.persist_events_and_notify( - [(event, context)], backfilled=backfilled + event.room_id, [(event, context)], backfilled=backfilled ) except Exception: run_in_background( @@ -1881,6 +1884,7 @@ class FederationHandler(BaseHandler): async def _handle_new_events( self, origin: str, + room_id: str, event_infos: Iterable[_NewEventInfo], backfilled: bool = False, ) -> None: @@ -1912,6 +1916,7 @@ class FederationHandler(BaseHandler): ) await self.persist_events_and_notify( + room_id, [ (ev_info.event, context) for ev_info, context in zip(event_infos, contexts) @@ -1922,6 +1927,7 @@ class FederationHandler(BaseHandler): async def _persist_auth_tree( self, origin: str, + room_id: str, auth_events: List[EventBase], state: List[EventBase], event: EventBase, @@ -1936,6 +1942,7 @@ class FederationHandler(BaseHandler): Args: origin: Where the events came from + room_id, auth_events state event @@ -2010,17 +2017,20 @@ class FederationHandler(BaseHandler): events_to_context[e.event_id].rejected = RejectedReason.AUTH_ERROR await self.persist_events_and_notify( + room_id, [ (e, events_to_context[e.event_id]) for e in itertools.chain(auth_events, state) - ] + ], ) new_event_context = await self.state_handler.compute_event_context( event, old_state=state ) - return await self.persist_events_and_notify([(event, new_event_context)]) + return await self.persist_events_and_notify( + room_id, [(event, new_event_context)] + ) async def _prep_event( self, @@ -2871,6 +2881,7 @@ class FederationHandler(BaseHandler): async def persist_events_and_notify( self, + room_id: str, event_and_contexts: Sequence[Tuple[EventBase, EventContext]], backfilled: bool = False, ) -> int: @@ -2878,14 +2889,19 @@ class FederationHandler(BaseHandler): necessary. Args: - event_and_contexts: + room_id: The room ID of events being persisted. + event_and_contexts: Sequence of events with their associated + context that should be persisted. All events must belong to + the same room. backfilled: Whether these events are a result of backfilling or not """ - if self.config.worker.writers.events != self._instance_name: + instance = self.config.worker.events_shard_config.get_instance(room_id) + if instance != self._instance_name: result = await self._send_events( - instance_name=self.config.worker.writers.events, + instance_name=instance, store=self.store, + room_id=room_id, event_and_contexts=event_and_contexts, backfilled=backfilled, ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e54e2b322b..a8fe5cf4e2 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -376,9 +376,8 @@ class EventCreationHandler: self.notifier = hs.get_notifier() self.config = hs.config self.require_membership_for_aliases = hs.config.require_membership_for_aliases - self._is_event_writer = ( - self.config.worker.writers.events == hs.get_instance_name() - ) + self._events_shard_config = self.config.worker.events_shard_config + self._instance_name = hs.get_instance_name() self.room_invite_state_types = self.hs.config.room_invite_state_types @@ -902,9 +901,10 @@ class EventCreationHandler: try: # If we're a worker we need to hit out to the master. - if not self._is_event_writer: + writer_instance = self._events_shard_config.get_instance(event.room_id) + if writer_instance != self._instance_name: result = await self.send_event( - instance_name=self.config.worker.writers.events, + instance_name=writer_instance, event_id=event.event_id, store=self.store, requester=requester, @@ -972,8 +972,10 @@ class EventCreationHandler: This should only be run on the instance in charge of persisting events. """ - assert self._is_event_writer assert self.storage.persistence is not None + assert self._events_shard_config.should_handle( + self._instance_name, event.room_id + ) if ratelimit: # We check if this is a room admin redacting an event so that we diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 53d85ab97d..eeade6ad3f 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -804,7 +804,9 @@ class RoomCreationHandler(BaseHandler): # Always wait for room creation to progate before returning await self._replication.wait_for_stream_position( - self.hs.config.worker.writers.events, "events", last_stream_id + self.hs.config.worker.events_shard_config.get_instance(room_id), + "events", + last_stream_id, ) return result, last_stream_id @@ -1259,10 +1261,10 @@ class RoomShutdownHandler: # We now wait for the create room to come back in via replication so # that we can assume that all the joins/invites have propogated before # we try and auto join below. - # - # TODO: Currently the events stream is written to from master await self._replication.wait_for_stream_position( - self.hs.config.worker.writers.events, "events", stream_id + self.hs.config.worker.events_shard_config.get_instance(new_room_id), + "events", + stream_id, ) else: new_room_id = None @@ -1292,7 +1294,9 @@ class RoomShutdownHandler: # Wait for leave to come in over replication before trying to forget. await self._replication.wait_for_stream_position( - self.hs.config.worker.writers.events, "events", stream_id + self.hs.config.worker.events_shard_config.get_instance(room_id), + "events", + stream_id, ) await self.room_member_handler.forget(target_requester.user, room_id) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 100f335b80..01a6e88262 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -82,13 +82,6 @@ class RoomMemberHandler: self._enable_lookup = hs.config.enable_3pid_lookup self.allow_per_room_profiles = self.config.allow_per_room_profiles - self._event_stream_writer_instance = hs.config.worker.writers.events - self._is_on_event_persistence_instance = ( - self._event_stream_writer_instance == hs.get_instance_name() - ) - if self._is_on_event_persistence_instance: - self.persist_event_storage = hs.get_storage().persistence - self._join_rate_limiter_local = Ratelimiter( clock=self.clock, rate_hz=hs.config.ratelimiting.rc_joins_local.per_second, diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 6b56315148..5c8be747e1 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -65,10 +65,11 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): self.federation_handler = hs.get_handlers().federation_handler @staticmethod - async def _serialize_payload(store, event_and_contexts, backfilled): + async def _serialize_payload(store, room_id, event_and_contexts, backfilled): """ Args: store + room_id (str) event_and_contexts (list[tuple[FrozenEvent, EventContext]]) backfilled (bool): Whether or not the events are the result of backfilling @@ -88,7 +89,11 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): } ) - payload = {"events": event_payloads, "backfilled": backfilled} + payload = { + "events": event_payloads, + "backfilled": backfilled, + "room_id": room_id, + } return payload @@ -96,6 +101,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): with Measure(self.clock, "repl_fed_send_events_parse"): content = parse_json_object_from_request(request) + room_id = content["room_id"] backfilled = content["backfilled"] event_payloads = content["events"] @@ -120,7 +126,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): logger.info("Got %d events from federation", len(event_and_contexts)) max_stream_id = await self.federation_handler.persist_events_and_notify( - event_and_contexts, backfilled + room_id, event_and_contexts, backfilled ) return 200, {"max_stream_id": max_stream_id} diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index 1c303f3a46..b323841f73 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -109,7 +109,7 @@ class ReplicationCommandHandler: if isinstance(stream, (EventsStream, BackfillStream)): # Only add EventStream and BackfillStream as a source on the # instance in charge of event persistence. - if hs.config.worker.writers.events == hs.get_instance_name(): + if hs.get_instance_name() in hs.config.worker.writers.events: self._streams_to_replicate.append(stream) continue diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index f929fc3954..ccc7ca30d8 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -19,7 +19,7 @@ from typing import List, Tuple, Type import attr -from ._base import Stream, StreamUpdateResult, Token, current_token_without_instance +from ._base import Stream, StreamUpdateResult, Token """Handling of the 'events' replication stream @@ -117,7 +117,7 @@ class EventsStream(Stream): self._store = hs.get_datastore() super().__init__( hs.get_instance_name(), - current_token_without_instance(self._store.get_current_events_token), + self._store._stream_id_gen.get_current_token_for_writer, self._update_function, ) diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py index 985b12df91..aa5d490624 100644 --- a/synapse/storage/databases/__init__.py +++ b/synapse/storage/databases/__init__.py @@ -75,7 +75,7 @@ class Databases: # If we're on a process that can persist events also # instantiate a `PersistEventsStore` - if hs.config.worker.writers.events == hs.get_instance_name(): + if hs.get_instance_name() in hs.config.worker.writers.events: persist_events = PersistEventsStore(hs, database, main) if "state" in database_config.databases: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 0b69aa6a94..4c3c162acf 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -438,7 +438,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas """ if stream_ordering <= self.stream_ordering_month_ago: - raise StoreError(400, "stream_ordering too old") + raise StoreError(400, "stream_ordering too old %s" % (stream_ordering,)) sql = """ SELECT event_id FROM stream_ordering_to_exterm diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 9cd1403b38..9a80f419e3 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -32,7 +32,7 @@ from synapse.logging.utils import log_function from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.search import SearchEntry -from synapse.storage.util.id_generators import StreamIdGenerator +from synapse.storage.util.id_generators import MultiWriterIdGenerator from synapse.types import StateMap, get_domain_from_id from synapse.util.frozenutils import frozendict_json_encoder from synapse.util.iterutils import batch_iter @@ -97,18 +97,21 @@ class PersistEventsStore: self.store = main_data_store self.database_engine = db.engine self._clock = hs.get_clock() + self._instance_name = hs.get_instance_name() self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages self.is_mine_id = hs.is_mine_id # Ideally we'd move these ID gens here, unfortunately some other ID # generators are chained off them so doing so is a bit of a PITA. - self._backfill_id_gen = self.store._backfill_id_gen # type: StreamIdGenerator - self._stream_id_gen = self.store._stream_id_gen # type: StreamIdGenerator + self._backfill_id_gen = ( + self.store._backfill_id_gen + ) # type: MultiWriterIdGenerator + self._stream_id_gen = self.store._stream_id_gen # type: MultiWriterIdGenerator # This should only exist on instances that are configured to write assert ( - hs.config.worker.writers.events == hs.get_instance_name() + hs.get_instance_name() in hs.config.worker.writers.events ), "Can only instantiate EventsStore on master" async def _persist_events_and_state_updates( @@ -809,6 +812,7 @@ class PersistEventsStore: table="events", values=[ { + "instance_name": self._instance_name, "stream_ordering": event.internal_metadata.stream_ordering, "topological_ordering": event.depth, "depth": event.depth, diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index a7a73cc3d8..17f5997b89 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -42,7 +42,8 @@ from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool -from synapse.storage.util.id_generators import StreamIdGenerator +from synapse.storage.engines import PostgresEngine +from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.types import Collection, get_domain_from_id from synapse.util.caches.descriptors import Cache, cached from synapse.util.iterutils import batch_iter @@ -78,27 +79,54 @@ class EventsWorkerStore(SQLBaseStore): def __init__(self, database: DatabasePool, db_conn, hs): super(EventsWorkerStore, self).__init__(database, db_conn, hs) - if hs.config.worker.writers.events == hs.get_instance_name(): - # We are the process in charge of generating stream ids for events, - # so instantiate ID generators based on the database - self._stream_id_gen = StreamIdGenerator( - db_conn, "events", "stream_ordering", + if isinstance(database.engine, PostgresEngine): + # If we're using Postgres than we can use `MultiWriterIdGenerator` + # regardless of whether this process writes to the streams or not. + self._stream_id_gen = MultiWriterIdGenerator( + db_conn=db_conn, + db=database, + instance_name=hs.get_instance_name(), + table="events", + instance_column="instance_name", + id_column="stream_ordering", + sequence_name="events_stream_seq", ) - self._backfill_id_gen = StreamIdGenerator( - db_conn, - "events", - "stream_ordering", - step=-1, - extra_tables=[("ex_outlier_stream", "event_stream_ordering")], + self._backfill_id_gen = MultiWriterIdGenerator( + db_conn=db_conn, + db=database, + instance_name=hs.get_instance_name(), + table="events", + instance_column="instance_name", + id_column="stream_ordering", + sequence_name="events_backfill_stream_seq", + positive=False, ) else: - # Another process is in charge of persisting events and generating - # stream IDs: rely on the replication streams to let us know which - # IDs we can process. - self._stream_id_gen = SlavedIdTracker(db_conn, "events", "stream_ordering") - self._backfill_id_gen = SlavedIdTracker( - db_conn, "events", "stream_ordering", step=-1 - ) + # We shouldn't be running in worker mode with SQLite, but its useful + # to support it for unit tests. + # + # If this process is the writer than we need to use + # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets + # updated over replication. (Multiple writers are not supported for + # SQLite). + if hs.get_instance_name() in hs.config.worker.writers.events: + self._stream_id_gen = StreamIdGenerator( + db_conn, "events", "stream_ordering", + ) + self._backfill_id_gen = StreamIdGenerator( + db_conn, + "events", + "stream_ordering", + step=-1, + extra_tables=[("ex_outlier_stream", "event_stream_ordering")], + ) + else: + self._stream_id_gen = SlavedIdTracker( + db_conn, "events", "stream_ordering" + ) + self._backfill_id_gen = SlavedIdTracker( + db_conn, "events", "stream_ordering", step=-1 + ) self._get_event_cache = Cache( "*getEvent*", diff --git a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql new file mode 100644 index 0000000000..98ff76d709 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql @@ -0,0 +1,16 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE events ADD COLUMN instance_name TEXT; diff --git a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres new file mode 100644 index 0000000000..97c1e6a0c5 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres @@ -0,0 +1,26 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SEQUENCE IF NOT EXISTS events_stream_seq; + +SELECT setval('events_stream_seq', ( + SELECT COALESCE(MAX(stream_ordering), 1) FROM events +)); + +CREATE SEQUENCE IF NOT EXISTS events_backfill_stream_seq; + +SELECT setval('events_backfill_stream_seq', ( + SELECT COALESCE(-MIN(stream_ordering), 1) FROM events +)); diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 2a66b3ad4e..1de2b91587 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -240,8 +240,12 @@ class MultiWriterIdGenerator: # gaps should be relatively rare it's still worth doing the book keeping # that allows us to skip forwards when there are gapless runs of # positions. + # + # We start at 1 here as a) the first generated stream ID will be 2, and + # b) other parts of the code assume that stream IDs are strictly greater + # than 0. self._persisted_upto_position = ( - min(self._current_positions.values()) if self._current_positions else 0 + min(self._current_positions.values()) if self._current_positions else 1 ) self._known_persisted_positions = [] # type: List[int] @@ -398,9 +402,7 @@ class MultiWriterIdGenerator: equal to it have been successfully persisted. """ - # Currently we don't support this operation, as it's not obvious how to - # condense the stream positions of multiple writers into a single int. - raise NotImplementedError() + return self.get_persisted_upto_position() def get_current_token_for_writer(self, instance_name: str) -> int: """Returns the position of the given writer. -- cgit 1.5.1 From cbabb312e0b59090e5a8cf9e7e016a8618e62867 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Sep 2020 16:11:18 +0100 Subject: Use `async with` for ID gens (#8383) This will allow us to hit the DB after we've finished using the generated stream ID. --- changelog.d/8383.misc | 1 + synapse/storage/databases/main/account_data.py | 4 +- synapse/storage/databases/main/deviceinbox.py | 4 +- synapse/storage/databases/main/devices.py | 6 +- synapse/storage/databases/main/end_to_end_keys.py | 2 +- synapse/storage/databases/main/events.py | 6 +- synapse/storage/databases/main/group_server.py | 2 +- synapse/storage/databases/main/presence.py | 4 +- synapse/storage/databases/main/push_rule.py | 8 +- synapse/storage/databases/main/pusher.py | 4 +- synapse/storage/databases/main/receipts.py | 2 +- synapse/storage/databases/main/room.py | 6 +- synapse/storage/databases/main/tags.py | 4 +- synapse/storage/util/id_generators.py | 130 +++++++++++++--------- tests/storage/test_id_generators.py | 66 ++++++----- 15 files changed, 144 insertions(+), 105 deletions(-) create mode 100644 changelog.d/8383.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8383.misc b/changelog.d/8383.misc new file mode 100644 index 0000000000..cb8318bf57 --- /dev/null +++ b/changelog.d/8383.misc @@ -0,0 +1 @@ +Refactor ID generators to use `async with` syntax. diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index c5a36990e4..ef81d73573 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -339,7 +339,7 @@ class AccountDataStore(AccountDataWorkerStore): """ content_json = json_encoder.encode(content) - with await self._account_data_id_gen.get_next() as next_id: + async with self._account_data_id_gen.get_next() as next_id: # no need to lock here as room_account_data has a unique constraint # on (user_id, room_id, account_data_type) so simple_upsert will # retry if there is a conflict. @@ -387,7 +387,7 @@ class AccountDataStore(AccountDataWorkerStore): """ content_json = json_encoder.encode(content) - with await self._account_data_id_gen.get_next() as next_id: + async with self._account_data_id_gen.get_next() as next_id: # no need to lock here as account_data has a unique constraint on # (user_id, account_data_type) so simple_upsert will retry if # there is a conflict. diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index e71217a41f..d42faa3f1f 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -362,7 +362,7 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) rows.append((destination, stream_id, now_ms, edu_json)) txn.executemany(sql, rows) - with await self._device_inbox_id_gen.get_next() as stream_id: + async with self._device_inbox_id_gen.get_next() as stream_id: now_ms = self.clock.time_msec() await self.db_pool.runInteraction( "add_messages_to_device_inbox", add_messages_txn, now_ms, stream_id @@ -411,7 +411,7 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) txn, stream_id, local_messages_by_user_then_device ) - with await self._device_inbox_id_gen.get_next() as stream_id: + async with self._device_inbox_id_gen.get_next() as stream_id: now_ms = self.clock.time_msec() await self.db_pool.runInteraction( "add_messages_from_remote_to_device_inbox", diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index c04374e43d..fdf394c612 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -377,7 +377,7 @@ class DeviceWorkerStore(SQLBaseStore): THe new stream ID. """ - with await self._device_list_id_gen.get_next() as stream_id: + async with self._device_list_id_gen.get_next() as stream_id: await self.db_pool.runInteraction( "add_user_sig_change_to_streams", self._add_user_signature_change_txn, @@ -1093,7 +1093,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): if not device_ids: return - with await self._device_list_id_gen.get_next_mult( + async with self._device_list_id_gen.get_next_mult( len(device_ids) ) as stream_ids: await self.db_pool.runInteraction( @@ -1108,7 +1108,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): return stream_ids[-1] context = get_active_span_text_map() - with await self._device_list_id_gen.get_next_mult( + async with self._device_list_id_gen.get_next_mult( len(hosts) * len(device_ids) ) as stream_ids: await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index c8df0bcb3f..22e1ed15d0 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -831,7 +831,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): key (dict): the key data """ - with await self._cross_signing_id_gen.get_next() as stream_id: + async with self._cross_signing_id_gen.get_next() as stream_id: return await self.db_pool.runInteraction( "add_e2e_cross_signing_key", self._set_e2e_cross_signing_key_txn, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 9a80f419e3..7723d82496 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -156,15 +156,15 @@ class PersistEventsStore: # Note: Multiple instances of this function cannot be in flight at # the same time for the same room. if backfilled: - stream_ordering_manager = await self._backfill_id_gen.get_next_mult( + stream_ordering_manager = self._backfill_id_gen.get_next_mult( len(events_and_contexts) ) else: - stream_ordering_manager = await self._stream_id_gen.get_next_mult( + stream_ordering_manager = self._stream_id_gen.get_next_mult( len(events_and_contexts) ) - with stream_ordering_manager as stream_orderings: + async with stream_ordering_manager as stream_orderings: for (event, context), stream in zip(events_and_contexts, stream_orderings): event.internal_metadata.stream_ordering = stream diff --git a/synapse/storage/databases/main/group_server.py b/synapse/storage/databases/main/group_server.py index ccfbb2135e..7218191965 100644 --- a/synapse/storage/databases/main/group_server.py +++ b/synapse/storage/databases/main/group_server.py @@ -1265,7 +1265,7 @@ class GroupServerStore(GroupServerWorkerStore): return next_id - with await self._group_updates_id_gen.get_next() as next_id: + async with self._group_updates_id_gen.get_next() as next_id: res = await self.db_pool.runInteraction( "register_user_group_membership", _register_user_group_membership_txn, diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index c9f655dfb7..dbbb99cb95 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -23,11 +23,11 @@ from synapse.util.iterutils import batch_iter class PresenceStore(SQLBaseStore): async def update_presence(self, presence_states): - stream_ordering_manager = await self._presence_id_gen.get_next_mult( + stream_ordering_manager = self._presence_id_gen.get_next_mult( len(presence_states) ) - with stream_ordering_manager as stream_orderings: + async with stream_ordering_manager as stream_orderings: await self.db_pool.runInteraction( "update_presence", self._update_presence_txn, diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index e20a16f907..711d5aa23d 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -338,7 +338,7 @@ class PushRuleStore(PushRulesWorkerStore): ) -> None: conditions_json = json_encoder.encode(conditions) actions_json = json_encoder.encode(actions) - with await self._push_rules_stream_id_gen.get_next() as stream_id: + async with self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() if before or after: @@ -585,7 +585,7 @@ class PushRuleStore(PushRulesWorkerStore): txn, stream_id, event_stream_ordering, user_id, rule_id, op="DELETE" ) - with await self._push_rules_stream_id_gen.get_next() as stream_id: + async with self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() await self.db_pool.runInteraction( @@ -616,7 +616,7 @@ class PushRuleStore(PushRulesWorkerStore): Raises: NotFoundError if the rule does not exist. """ - with await self._push_rules_stream_id_gen.get_next() as stream_id: + async with self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() await self.db_pool.runInteraction( "_set_push_rule_enabled_txn", @@ -754,7 +754,7 @@ class PushRuleStore(PushRulesWorkerStore): data={"actions": actions_json}, ) - with await self._push_rules_stream_id_gen.get_next() as stream_id: + async with self._push_rules_stream_id_gen.get_next() as stream_id: event_stream_ordering = self._stream_id_gen.get_current_token() await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index c388468273..df8609b97b 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -281,7 +281,7 @@ class PusherStore(PusherWorkerStore): last_stream_ordering, profile_tag="", ) -> None: - with await self._pushers_id_gen.get_next() as stream_id: + async with self._pushers_id_gen.get_next() as stream_id: # no need to lock because `pushers` has a unique key on # (app_id, pushkey, user_name) so simple_upsert will retry await self.db_pool.simple_upsert( @@ -344,7 +344,7 @@ class PusherStore(PusherWorkerStore): }, ) - with await self._pushers_id_gen.get_next() as stream_id: + async with self._pushers_id_gen.get_next() as stream_id: await self.db_pool.runInteraction( "delete_pusher", delete_pusher_txn, stream_id ) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index f880b5e562..c79ddff680 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -524,7 +524,7 @@ class ReceiptsStore(ReceiptsWorkerStore): "insert_receipt_conv", graph_to_linear ) - with await self._receipts_id_gen.get_next() as stream_id: + async with self._receipts_id_gen.get_next() as stream_id: event_ts = await self.db_pool.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 3ee097abf7..3c7630857f 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1137,7 +1137,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): }, ) - with await self._public_room_id_gen.get_next() as next_id: + async with self._public_room_id_gen.get_next() as next_id: await self.db_pool.runInteraction( "store_room_txn", store_room_txn, next_id ) @@ -1204,7 +1204,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): }, ) - with await self._public_room_id_gen.get_next() as next_id: + async with self._public_room_id_gen.get_next() as next_id: await self.db_pool.runInteraction( "set_room_is_public", set_room_is_public_txn, next_id ) @@ -1284,7 +1284,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): }, ) - with await self._public_room_id_gen.get_next() as next_id: + async with self._public_room_id_gen.get_next() as next_id: await self.db_pool.runInteraction( "set_room_is_public_appservice", set_room_is_public_appservice_txn, diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py index 96ffe26cc9..9f120d3cb6 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py @@ -210,7 +210,7 @@ class TagsStore(TagsWorkerStore): ) self._update_revision_txn(txn, user_id, room_id, next_id) - with await self._account_data_id_gen.get_next() as next_id: + async with self._account_data_id_gen.get_next() as next_id: await self.db_pool.runInteraction("add_tag", add_tag_txn, next_id) self.get_tags_for_user.invalidate((user_id,)) @@ -232,7 +232,7 @@ class TagsStore(TagsWorkerStore): txn.execute(sql, (user_id, room_id, tag)) self._update_revision_txn(txn, user_id, room_id, next_id) - with await self._account_data_id_gen.get_next() as next_id: + async with self._account_data_id_gen.get_next() as next_id: await self.db_pool.runInteraction("remove_tag", remove_tag_txn, next_id) self.get_tags_for_user.invalidate((user_id,)) diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 1de2b91587..b0353ac2dc 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -12,14 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import contextlib import heapq import logging import threading from collections import deque -from typing import Dict, List, Set +from contextlib import contextmanager +from typing import Dict, List, Optional, Set, Union +import attr from typing_extensions import Deque from synapse.storage.database import DatabasePool, LoggingTransaction @@ -86,7 +86,7 @@ class StreamIdGenerator: upwards, -1 to grow downwards. Usage: - with await stream_id_gen.get_next() as stream_id: + async with stream_id_gen.get_next() as stream_id: # ... persist event ... """ @@ -101,10 +101,10 @@ class StreamIdGenerator: ) self._unfinished_ids = deque() # type: Deque[int] - async def get_next(self): + def get_next(self): """ Usage: - with await stream_id_gen.get_next() as stream_id: + async with stream_id_gen.get_next() as stream_id: # ... persist event ... """ with self._lock: @@ -113,7 +113,7 @@ class StreamIdGenerator: self._unfinished_ids.append(next_id) - @contextlib.contextmanager + @contextmanager def manager(): try: yield next_id @@ -121,12 +121,12 @@ class StreamIdGenerator: with self._lock: self._unfinished_ids.remove(next_id) - return manager() + return _AsyncCtxManagerWrapper(manager()) - async def get_next_mult(self, n): + def get_next_mult(self, n): """ Usage: - with await stream_id_gen.get_next(n) as stream_ids: + async with stream_id_gen.get_next(n) as stream_ids: # ... persist events ... """ with self._lock: @@ -140,7 +140,7 @@ class StreamIdGenerator: for next_id in next_ids: self._unfinished_ids.append(next_id) - @contextlib.contextmanager + @contextmanager def manager(): try: yield next_ids @@ -149,7 +149,7 @@ class StreamIdGenerator: for next_id in next_ids: self._unfinished_ids.remove(next_id) - return manager() + return _AsyncCtxManagerWrapper(manager()) def get_current_token(self): """Returns the maximum stream id such that all stream ids less than or @@ -282,59 +282,23 @@ class MultiWriterIdGenerator: def _load_next_mult_id_txn(self, txn, n: int) -> List[int]: return self._sequence_gen.get_next_mult_txn(txn, n) - async def get_next(self): + def get_next(self): """ Usage: - with await stream_id_gen.get_next() as stream_id: + async with stream_id_gen.get_next() as stream_id: # ... persist event ... """ - next_id = await self._db.runInteraction("_load_next_id", self._load_next_id_txn) - - # Assert the fetched ID is actually greater than what we currently - # believe the ID to be. If not, then the sequence and table have got - # out of sync somehow. - with self._lock: - assert self._current_positions.get(self._instance_name, 0) < next_id - - self._unfinished_ids.add(next_id) - - @contextlib.contextmanager - def manager(): - try: - # Multiply by the return factor so that the ID has correct sign. - yield self._return_factor * next_id - finally: - self._mark_id_as_finished(next_id) - return manager() + return _MultiWriterCtxManager(self) - async def get_next_mult(self, n: int): + def get_next_mult(self, n: int): """ Usage: - with await stream_id_gen.get_next_mult(5) as stream_ids: + async with stream_id_gen.get_next_mult(5) as stream_ids: # ... persist events ... """ - next_ids = await self._db.runInteraction( - "_load_next_mult_id", self._load_next_mult_id_txn, n - ) - # Assert the fetched ID is actually greater than any ID we've already - # seen. If not, then the sequence and table have got out of sync - # somehow. - with self._lock: - assert max(self._current_positions.values(), default=0) < min(next_ids) - - self._unfinished_ids.update(next_ids) - - @contextlib.contextmanager - def manager(): - try: - yield [self._return_factor * i for i in next_ids] - finally: - for i in next_ids: - self._mark_id_as_finished(i) - - return manager() + return _MultiWriterCtxManager(self, n) def get_next_txn(self, txn: LoggingTransaction): """ @@ -482,3 +446,61 @@ class MultiWriterIdGenerator: # There was a gap in seen positions, so there is nothing more to # do. break + + +@attr.s(slots=True) +class _AsyncCtxManagerWrapper: + """Helper class to convert a plain context manager to an async one. + + This is mainly useful if you have a plain context manager but the interface + requires an async one. + """ + + inner = attr.ib() + + async def __aenter__(self): + return self.inner.__enter__() + + async def __aexit__(self, exc_type, exc, tb): + return self.inner.__exit__(exc_type, exc, tb) + + +@attr.s(slots=True) +class _MultiWriterCtxManager: + """Async context manager returned by MultiWriterIdGenerator + """ + + id_gen = attr.ib(type=MultiWriterIdGenerator) + multiple_ids = attr.ib(type=Optional[int], default=None) + stream_ids = attr.ib(type=List[int], factory=list) + + async def __aenter__(self) -> Union[int, List[int]]: + self.stream_ids = await self.id_gen._db.runInteraction( + "_load_next_mult_id", + self.id_gen._load_next_mult_id_txn, + self.multiple_ids or 1, + ) + + # Assert the fetched ID is actually greater than any ID we've already + # seen. If not, then the sequence and table have got out of sync + # somehow. + with self.id_gen._lock: + assert max(self.id_gen._current_positions.values(), default=0) < min( + self.stream_ids + ) + + self.id_gen._unfinished_ids.update(self.stream_ids) + + if self.multiple_ids is None: + return self.stream_ids[0] * self.id_gen._return_factor + else: + return [i * self.id_gen._return_factor for i in self.stream_ids] + + async def __aexit__(self, exc_type, exc, tb): + for i in self.stream_ids: + self.id_gen._mark_id_as_finished(i) + + if exc_type is not None: + return False + + return False diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index 20636fc400..fb8f5bc255 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -111,7 +111,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): # advanced after we leave the context manager. async def _get_next_async(): - with await id_gen.get_next() as stream_id: + async with id_gen.get_next() as stream_id: self.assertEqual(stream_id, 8) self.assertEqual(id_gen.get_positions(), {"master": 7}) @@ -139,10 +139,10 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): ctx3 = self.get_success(id_gen.get_next()) ctx4 = self.get_success(id_gen.get_next()) - s1 = ctx1.__enter__() - s2 = ctx2.__enter__() - s3 = ctx3.__enter__() - s4 = ctx4.__enter__() + s1 = self.get_success(ctx1.__aenter__()) + s2 = self.get_success(ctx2.__aenter__()) + s3 = self.get_success(ctx3.__aenter__()) + s4 = self.get_success(ctx4.__aenter__()) self.assertEqual(s1, 8) self.assertEqual(s2, 9) @@ -152,22 +152,22 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.assertEqual(id_gen.get_positions(), {"master": 7}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) - ctx2.__exit__(None, None, None) + self.get_success(ctx2.__aexit__(None, None, None)) self.assertEqual(id_gen.get_positions(), {"master": 7}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) - ctx1.__exit__(None, None, None) + self.get_success(ctx1.__aexit__(None, None, None)) self.assertEqual(id_gen.get_positions(), {"master": 9}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 9) - ctx4.__exit__(None, None, None) + self.get_success(ctx4.__aexit__(None, None, None)) self.assertEqual(id_gen.get_positions(), {"master": 9}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 9) - ctx3.__exit__(None, None, None) + self.get_success(ctx3.__aexit__(None, None, None)) self.assertEqual(id_gen.get_positions(), {"master": 11}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 11) @@ -190,7 +190,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): # advanced after we leave the context manager. async def _get_next_async(): - with await first_id_gen.get_next() as stream_id: + async with first_id_gen.get_next() as stream_id: self.assertEqual(stream_id, 8) self.assertEqual( @@ -208,7 +208,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): # stream ID async def _get_next_async(): - with await second_id_gen.get_next() as stream_id: + async with second_id_gen.get_next() as stream_id: self.assertEqual(stream_id, 9) self.assertEqual( @@ -305,9 +305,13 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.assertEqual(id_gen.get_positions(), {"first": 3, "second": 5}) self.assertEqual(id_gen.get_persisted_upto_position(), 3) - with self.get_success(id_gen.get_next()) as stream_id: - self.assertEqual(stream_id, 6) - self.assertEqual(id_gen.get_persisted_upto_position(), 3) + + async def _get_next_async(): + async with id_gen.get_next() as stream_id: + self.assertEqual(stream_id, 6) + self.assertEqual(id_gen.get_persisted_upto_position(), 3) + + self.get_success(_get_next_async()) self.assertEqual(id_gen.get_persisted_upto_position(), 6) @@ -373,16 +377,22 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): """ id_gen = self._create_id_generator() - with self.get_success(id_gen.get_next()) as stream_id: - self._insert_row("master", stream_id) + async def _get_next_async(): + async with id_gen.get_next() as stream_id: + self._insert_row("master", stream_id) + + self.get_success(_get_next_async()) self.assertEqual(id_gen.get_positions(), {"master": -1}) self.assertEqual(id_gen.get_current_token_for_writer("master"), -1) self.assertEqual(id_gen.get_persisted_upto_position(), -1) - with self.get_success(id_gen.get_next_mult(3)) as stream_ids: - for stream_id in stream_ids: - self._insert_row("master", stream_id) + async def _get_next_async2(): + async with id_gen.get_next_mult(3) as stream_ids: + for stream_id in stream_ids: + self._insert_row("master", stream_id) + + self.get_success(_get_next_async2()) self.assertEqual(id_gen.get_positions(), {"master": -4}) self.assertEqual(id_gen.get_current_token_for_writer("master"), -4) @@ -402,18 +412,24 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): id_gen_1 = self._create_id_generator("first") id_gen_2 = self._create_id_generator("second") - with self.get_success(id_gen_1.get_next()) as stream_id: - self._insert_row("first", stream_id) - id_gen_2.advance("first", stream_id) + async def _get_next_async(): + async with id_gen_1.get_next() as stream_id: + self._insert_row("first", stream_id) + id_gen_2.advance("first", stream_id) + + self.get_success(_get_next_async()) self.assertEqual(id_gen_1.get_positions(), {"first": -1}) self.assertEqual(id_gen_2.get_positions(), {"first": -1}) self.assertEqual(id_gen_1.get_persisted_upto_position(), -1) self.assertEqual(id_gen_2.get_persisted_upto_position(), -1) - with self.get_success(id_gen_2.get_next()) as stream_id: - self._insert_row("second", stream_id) - id_gen_1.advance("second", stream_id) + async def _get_next_async2(): + async with id_gen_2.get_next() as stream_id: + self._insert_row("second", stream_id) + id_gen_1.advance("second", stream_id) + + self.get_success(_get_next_async2()) self.assertEqual(id_gen_1.get_positions(), {"first": -1, "second": -2}) self.assertEqual(id_gen_2.get_positions(), {"first": -1, "second": -2}) -- cgit 1.5.1 From f112cfe5bb2c918c9e942941686a05664d8bd7da Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 24 Sep 2020 16:53:51 +0100 Subject: Fix MultiWriteIdGenerator's handling of restarts. (#8374) On startup `MultiWriteIdGenerator` fetches the maximum stream ID for each instance from the table and uses that as its initial "current position" for each writer. This is problematic as a) it involves either a scan of events table or an index (neither of which is ideal), and b) if rows are being persisted out of order elsewhere while the process restarts then using the maximum stream ID is not correct. This could theoretically lead to race conditions where e.g. events that are persisted out of order are not sent down sync streams. We fix this by creating a new table that tracks the current positions of each writer to the stream, and update it each time we finish persisting a new entry. This is a relatively small overhead when persisting events. However for the cache invalidation stream this is a much bigger relative overhead, so instead we note that for invalidation we don't actually care about reliability over restarts (as there's no caches to invalidate) and simply don't bother reading and writing to the new table in that particular case. --- changelog.d/8374.bugfix | 1 + synapse/replication/slave/storage/_base.py | 2 + synapse/storage/databases/main/__init__.py | 8 +- synapse/storage/databases/main/events_worker.py | 4 + .../main/schema/delta/58/18stream_positions.sql | 22 +++ synapse/storage/util/id_generators.py | 148 ++++++++++++++++++--- tests/storage/test_id_generators.py | 119 +++++++++++++++-- 7 files changed, 274 insertions(+), 30 deletions(-) create mode 100644 changelog.d/8374.bugfix create mode 100644 synapse/storage/databases/main/schema/delta/58/18stream_positions.sql (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8374.bugfix b/changelog.d/8374.bugfix new file mode 100644 index 0000000000..155bc3404f --- /dev/null +++ b/changelog.d/8374.bugfix @@ -0,0 +1 @@ +Fix theoretical race condition where events are not sent down `/sync` if the synchrotron worker is restarted without restarting other workers. diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py index d25fa49e1a..d0089fe06c 100644 --- a/synapse/replication/slave/storage/_base.py +++ b/synapse/replication/slave/storage/_base.py @@ -31,11 +31,13 @@ class BaseSlavedStore(CacheInvalidationWorkerStore): self._cache_id_gen = MultiWriterIdGenerator( db_conn, database, + stream_name="caches", instance_name=hs.get_instance_name(), table="cache_invalidation_stream_by_instance", instance_column="instance_name", id_column="stream_id", sequence_name="cache_invalidation_stream_seq", + writers=[], ) # type: Optional[MultiWriterIdGenerator] else: self._cache_id_gen = None diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index ccb3384db9..0cb12f4c61 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -160,14 +160,20 @@ class DataStore( ) if isinstance(self.database_engine, PostgresEngine): + # We set the `writers` to an empty list here as we don't care about + # missing updates over restarts, as we'll not have anything in our + # caches to invalidate. (This reduces the amount of writes to the DB + # that happen). self._cache_id_gen = MultiWriterIdGenerator( db_conn, database, - instance_name="master", + stream_name="caches", + instance_name=hs.get_instance_name(), table="cache_invalidation_stream_by_instance", instance_column="instance_name", id_column="stream_id", sequence_name="cache_invalidation_stream_seq", + writers=[], ) else: self._cache_id_gen = None diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index de9e8d1dc6..f95679ebc4 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -83,21 +83,25 @@ class EventsWorkerStore(SQLBaseStore): self._stream_id_gen = MultiWriterIdGenerator( db_conn=db_conn, db=database, + stream_name="events", instance_name=hs.get_instance_name(), table="events", instance_column="instance_name", id_column="stream_ordering", sequence_name="events_stream_seq", + writers=hs.config.worker.writers.events, ) self._backfill_id_gen = MultiWriterIdGenerator( db_conn=db_conn, db=database, + stream_name="backfill", instance_name=hs.get_instance_name(), table="events", instance_column="instance_name", id_column="stream_ordering", sequence_name="events_backfill_stream_seq", positive=False, + writers=hs.config.worker.writers.events, ) else: # We shouldn't be running in worker mode with SQLite, but its useful diff --git a/synapse/storage/databases/main/schema/delta/58/18stream_positions.sql b/synapse/storage/databases/main/schema/delta/58/18stream_positions.sql new file mode 100644 index 0000000000..985fd949a2 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/18stream_positions.sql @@ -0,0 +1,22 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE stream_positions ( + stream_name TEXT NOT NULL, + instance_name TEXT NOT NULL, + stream_id BIGINT NOT NULL +); + +CREATE UNIQUE INDEX stream_positions_idx ON stream_positions(stream_name, instance_name); diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index b0353ac2dc..727fcc521c 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -22,6 +22,7 @@ from typing import Dict, List, Optional, Set, Union import attr from typing_extensions import Deque +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.util.sequence import PostgresSequenceGenerator @@ -184,12 +185,16 @@ class MultiWriterIdGenerator: Args: db_conn db + stream_name: A name for the stream. instance_name: The name of this instance. table: Database table associated with stream. instance_column: Column that stores the row's writer's instance name id_column: Column that stores the stream ID. sequence_name: The name of the postgres sequence used to generate new IDs. + writers: A list of known writers to use to populate current positions + on startup. Can be empty if nothing uses `get_current_token` or + `get_positions` (e.g. caches stream). positive: Whether the IDs are positive (true) or negative (false). When using negative IDs we go backwards from -1 to -2, -3, etc. """ @@ -198,16 +203,20 @@ class MultiWriterIdGenerator: self, db_conn, db: DatabasePool, + stream_name: str, instance_name: str, table: str, instance_column: str, id_column: str, sequence_name: str, + writers: List[str], positive: bool = True, ): self._db = db + self._stream_name = stream_name self._instance_name = instance_name self._positive = positive + self._writers = writers self._return_factor = 1 if positive else -1 # We lock as some functions may be called from DB threads. @@ -216,9 +225,7 @@ class MultiWriterIdGenerator: # Note: If we are a negative stream then we still store all the IDs as # positive to make life easier for us, and simply negate the IDs when we # return them. - self._current_positions = self._load_current_ids( - db_conn, table, instance_column, id_column - ) + self._current_positions = {} # type: Dict[str, int] # Set of local IDs that we're still processing. The current position # should be less than the minimum of this set (if not empty). @@ -251,30 +258,80 @@ class MultiWriterIdGenerator: self._sequence_gen = PostgresSequenceGenerator(sequence_name) + # This goes and fills out the above state from the database. + self._load_current_ids(db_conn, table, instance_column, id_column) + def _load_current_ids( self, db_conn, table: str, instance_column: str, id_column: str - ) -> Dict[str, int]: - # If positive stream aggregate via MAX. For negative stream use MIN - # *and* negate the result to get a positive number. - sql = """ - SELECT %(instance)s, %(agg)s(%(id)s) FROM %(table)s - GROUP BY %(instance)s - """ % { - "instance": instance_column, - "id": id_column, - "table": table, - "agg": "MAX" if self._positive else "-MIN", - } - + ): cur = db_conn.cursor() - cur.execute(sql) - # `cur` is an iterable over returned rows, which are 2-tuples. - current_positions = dict(cur) + # Load the current positions of all writers for the stream. + if self._writers: + sql = """ + SELECT instance_name, stream_id FROM stream_positions + WHERE stream_name = ? + """ + sql = self._db.engine.convert_param_style(sql) - cur.close() + cur.execute(sql, (self._stream_name,)) + + self._current_positions = { + instance: stream_id * self._return_factor + for instance, stream_id in cur + if instance in self._writers + } + + # We set the `_persisted_upto_position` to be the minimum of all current + # positions. If empty we use the max stream ID from the DB table. + min_stream_id = min(self._current_positions.values(), default=None) + + if min_stream_id is None: + sql = """ + SELECT COALESCE(%(agg)s(%(id)s), 1) FROM %(table)s + """ % { + "id": id_column, + "table": table, + "agg": "MAX" if self._positive else "-MIN", + } + cur.execute(sql) + (stream_id,) = cur.fetchone() + self._persisted_upto_position = stream_id + else: + # If we have a min_stream_id then we pull out everything greater + # than it from the DB so that we can prefill + # `_known_persisted_positions` and get a more accurate + # `_persisted_upto_position`. + # + # We also check if any of the later rows are from this instance, in + # which case we use that for this instance's current position. This + # is to handle the case where we didn't finish persisting to the + # stream positions table before restart (or the stream position + # table otherwise got out of date). + + sql = """ + SELECT %(instance)s, %(id)s FROM %(table)s + WHERE ? %(cmp)s %(id)s + """ % { + "id": id_column, + "table": table, + "instance": instance_column, + "cmp": "<=" if self._positive else ">=", + } + sql = self._db.engine.convert_param_style(sql) + cur.execute(sql, (min_stream_id,)) + + self._persisted_upto_position = min_stream_id + + with self._lock: + for (instance, stream_id,) in cur: + stream_id = self._return_factor * stream_id + self._add_persisted_position(stream_id) - return current_positions + if instance == self._instance_name: + self._current_positions[instance] = stream_id + + cur.close() def _load_next_id_txn(self, txn) -> int: return self._sequence_gen.get_next_id_txn(txn) @@ -316,6 +373,21 @@ class MultiWriterIdGenerator: txn.call_after(self._mark_id_as_finished, next_id) txn.call_on_exception(self._mark_id_as_finished, next_id) + # Update the `stream_positions` table with newly updated stream + # ID (unless self._writers is not set in which case we don't + # bother, as nothing will read it). + # + # We only do this on the success path so that the persisted current + # position points to a persited row with the correct instance name. + if self._writers: + txn.call_after( + run_as_background_process, + "MultiWriterIdGenerator._update_table", + self._db.runInteraction, + "MultiWriterIdGenerator._update_table", + self._update_stream_positions_table_txn, + ) + return self._return_factor * next_id def _mark_id_as_finished(self, next_id: int): @@ -447,6 +519,28 @@ class MultiWriterIdGenerator: # do. break + def _update_stream_positions_table_txn(self, txn): + """Update the `stream_positions` table with newly persisted position. + """ + + if not self._writers: + return + + # We upsert the value, ensuring on conflict that we always increase the + # value (or decrease if stream goes backwards). + sql = """ + INSERT INTO stream_positions (stream_name, instance_name, stream_id) + VALUES (?, ?, ?) + ON CONFLICT (stream_name, instance_name) + DO UPDATE SET + stream_id = %(agg)s(stream_positions.stream_id, EXCLUDED.stream_id) + """ % { + "agg": "GREATEST" if self._positive else "LEAST", + } + + pos = (self.get_current_token_for_writer(self._instance_name),) + txn.execute(sql, (self._stream_name, self._instance_name, pos)) + @attr.s(slots=True) class _AsyncCtxManagerWrapper: @@ -503,4 +597,16 @@ class _MultiWriterCtxManager: if exc_type is not None: return False + # Update the `stream_positions` table with newly updated stream + # ID (unless self._writers is not set in which case we don't + # bother, as nothing will read it). + # + # We only do this on the success path so that the persisted current + # position points to a persisted row with the correct instance name. + if self.id_gen._writers: + await self.id_gen._db.runInteraction( + "MultiWriterIdGenerator._update_table", + self.id_gen._update_stream_positions_table_txn, + ) + return False diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index fb8f5bc255..d4ff55fbff 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -43,16 +43,20 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): """ ) - def _create_id_generator(self, instance_name="master") -> MultiWriterIdGenerator: + def _create_id_generator( + self, instance_name="master", writers=["master"] + ) -> MultiWriterIdGenerator: def _create(conn): return MultiWriterIdGenerator( conn, self.db_pool, + stream_name="test_stream", instance_name=instance_name, table="foobar", instance_column="instance_name", id_column="stream_id", sequence_name="foobar_seq", + writers=writers, ) return self.get_success(self.db_pool.runWithConnection(_create)) @@ -68,6 +72,13 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): "INSERT INTO foobar VALUES (nextval('foobar_seq'), ?)", (instance_name,), ) + txn.execute( + """ + INSERT INTO stream_positions VALUES ('test_stream', ?, lastval()) + ON CONFLICT (stream_name, instance_name) DO UPDATE SET stream_id = lastval() + """, + (instance_name,), + ) self.get_success(self.db_pool.runInteraction("_insert_rows", _insert)) @@ -81,6 +92,13 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): "INSERT INTO foobar VALUES (?, ?)", (stream_id, instance_name,), ) txn.execute("SELECT setval('foobar_seq', ?)", (stream_id,)) + txn.execute( + """ + INSERT INTO stream_positions VALUES ('test_stream', ?, ?) + ON CONFLICT (stream_name, instance_name) DO UPDATE SET stream_id = ? + """, + (instance_name, stream_id, stream_id), + ) self.get_success(self.db_pool.runInteraction("_insert_row_with_id", _insert)) @@ -179,8 +197,8 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self._insert_rows("first", 3) self._insert_rows("second", 4) - first_id_gen = self._create_id_generator("first") - second_id_gen = self._create_id_generator("second") + first_id_gen = self._create_id_generator("first", writers=["first", "second"]) + second_id_gen = self._create_id_generator("second", writers=["first", "second"]) self.assertEqual(first_id_gen.get_positions(), {"first": 3, "second": 7}) self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 3) @@ -262,7 +280,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self._insert_row_with_id("first", 3) self._insert_row_with_id("second", 5) - id_gen = self._create_id_generator("first") + id_gen = self._create_id_generator("first", writers=["first", "second"]) self.assertEqual(id_gen.get_positions(), {"first": 3, "second": 5}) @@ -300,7 +318,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self._insert_row_with_id("first", 3) self._insert_row_with_id("second", 5) - id_gen = self._create_id_generator("first") + id_gen = self._create_id_generator("first", writers=["first", "second"]) self.assertEqual(id_gen.get_positions(), {"first": 3, "second": 5}) @@ -319,6 +337,80 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): # `persisted_upto_position` in this case, then it will be correct in the # other cases that are tested above (since they'll hit the same code). + def test_restart_during_out_of_order_persistence(self): + """Test that restarting a process while another process is writing out + of order updates are handled correctly. + """ + + # Prefill table with 7 rows written by 'master' + self._insert_rows("master", 7) + + id_gen = self._create_id_generator() + + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + + # Persist two rows at once + ctx1 = self.get_success(id_gen.get_next()) + ctx2 = self.get_success(id_gen.get_next()) + + s1 = self.get_success(ctx1.__aenter__()) + s2 = self.get_success(ctx2.__aenter__()) + + self.assertEqual(s1, 8) + self.assertEqual(s2, 9) + + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + + # We finish persisting the second row before restart + self.get_success(ctx2.__aexit__(None, None, None)) + + # We simulate a restart of another worker by just creating a new ID gen. + id_gen_worker = self._create_id_generator("worker") + + # Restarted worker should not see the second persisted row + self.assertEqual(id_gen_worker.get_positions(), {"master": 7}) + self.assertEqual(id_gen_worker.get_current_token_for_writer("master"), 7) + + # Now if we persist the first row then both instances should jump ahead + # correctly. + self.get_success(ctx1.__aexit__(None, None, None)) + + self.assertEqual(id_gen.get_positions(), {"master": 9}) + id_gen_worker.advance("master", 9) + self.assertEqual(id_gen_worker.get_positions(), {"master": 9}) + + def test_writer_config_change(self): + """Test that changing the writer config correctly works. + """ + + self._insert_row_with_id("first", 3) + self._insert_row_with_id("second", 5) + + # Initial config has two writers + id_gen = self._create_id_generator("first", writers=["first", "second"]) + self.assertEqual(id_gen.get_persisted_upto_position(), 3) + + # New config removes one of the configs. Note that if the writer is + # removed from config we assume that it has been shut down and has + # finished persisting, hence why the persisted upto position is 5. + id_gen_2 = self._create_id_generator("second", writers=["second"]) + self.assertEqual(id_gen_2.get_persisted_upto_position(), 5) + + # This config points to a single, previously unused writer. + id_gen_3 = self._create_id_generator("third", writers=["third"]) + self.assertEqual(id_gen_3.get_persisted_upto_position(), 5) + + # Check that we get a sane next stream ID with this new config. + + async def _get_next_async(): + async with id_gen_3.get_next() as stream_id: + self.assertEqual(stream_id, 6) + + self.get_success(_get_next_async()) + self.assertEqual(id_gen_3.get_persisted_upto_position(), 6) + class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): """Tests MultiWriterIdGenerator that produce *negative* stream IDs. @@ -345,16 +437,20 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): """ ) - def _create_id_generator(self, instance_name="master") -> MultiWriterIdGenerator: + def _create_id_generator( + self, instance_name="master", writers=["master"] + ) -> MultiWriterIdGenerator: def _create(conn): return MultiWriterIdGenerator( conn, self.db_pool, + stream_name="test_stream", instance_name=instance_name, table="foobar", instance_column="instance_name", id_column="stream_id", sequence_name="foobar_seq", + writers=writers, positive=False, ) @@ -368,6 +464,13 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): txn.execute( "INSERT INTO foobar VALUES (?, ?)", (stream_id, instance_name,), ) + txn.execute( + """ + INSERT INTO stream_positions VALUES ('test_stream', ?, ?) + ON CONFLICT (stream_name, instance_name) DO UPDATE SET stream_id = ? + """, + (instance_name, -stream_id, -stream_id), + ) self.get_success(self.db_pool.runInteraction("_insert_row", _insert)) @@ -409,8 +512,8 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): """Tests that having multiple instances that get advanced over federation works corretly. """ - id_gen_1 = self._create_id_generator("first") - id_gen_2 = self._create_id_generator("second") + id_gen_1 = self._create_id_generator("first", writers=["first", "second"]) + id_gen_2 = self._create_id_generator("second", writers=["first", "second"]) async def _get_next_async(): async with id_gen_1.get_next() as stream_id: -- cgit 1.5.1 From 3e87d79e1c6ef894387ee2f24e008dfb8f5f853f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 25 Sep 2020 09:58:32 +0100 Subject: Fix schema delta for servers that have not backfilled (#8396) Fixes #8395. --- changelog.d/8396.feature | 1 + .../main/schema/delta/58/14events_instance_name.sql.postgres | 4 +++- synapse/storage/util/id_generators.py | 6 +++++- 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 changelog.d/8396.feature (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8396.feature b/changelog.d/8396.feature new file mode 100644 index 0000000000..b363e929ea --- /dev/null +++ b/changelog.d/8396.feature @@ -0,0 +1 @@ +Add experimental support for sharding event persister. diff --git a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres index 97c1e6a0c5..c31f9af82a 100644 --- a/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres +++ b/synapse/storage/databases/main/schema/delta/58/14events_instance_name.sql.postgres @@ -21,6 +21,8 @@ SELECT setval('events_stream_seq', ( CREATE SEQUENCE IF NOT EXISTS events_backfill_stream_seq; +-- If the server has never backfilled a room then doing `-MIN(...)` will give +-- a negative result, hence why we do `GREATEST(...)` SELECT setval('events_backfill_stream_seq', ( - SELECT COALESCE(-MIN(stream_ordering), 1) FROM events + SELECT GREATEST(COALESCE(-MIN(stream_ordering), 1), 1) FROM events )); diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 727fcc521c..4269eaf918 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -287,8 +287,12 @@ class MultiWriterIdGenerator: min_stream_id = min(self._current_positions.values(), default=None) if min_stream_id is None: + # We add a GREATEST here to ensure that the result is always + # positive. (This can be a problem for e.g. backfill streams where + # the server has never backfilled). sql = """ - SELECT COALESCE(%(agg)s(%(id)s), 1) FROM %(table)s + SELECT GREATEST(COALESCE(%(agg)s(%(id)s), 1), 1) + FROM %(table)s """ % { "id": id_column, "table": table, -- cgit 1.5.1 From bd380d942fdf91cf1214d6859f2bc97d12a92ab4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 28 Sep 2020 18:00:30 +0100 Subject: Add checks for postgres sequence consistency (#8402) --- changelog.d/8402.misc | 1 + docs/postgres.md | 11 ++++ synapse/storage/databases/main/registration.py | 3 + synapse/storage/databases/state/store.py | 3 + synapse/storage/util/id_generators.py | 5 ++ synapse/storage/util/sequence.py | 90 +++++++++++++++++++++++++- tests/storage/test_id_generators.py | 22 ++++++- tests/unittest.py | 31 ++++++++- 8 files changed, 160 insertions(+), 6 deletions(-) create mode 100644 changelog.d/8402.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8402.misc b/changelog.d/8402.misc new file mode 100644 index 0000000000..ad1804d207 --- /dev/null +++ b/changelog.d/8402.misc @@ -0,0 +1 @@ +Add checks on startup that PostgreSQL sequences are consistent with their associated tables. diff --git a/docs/postgres.md b/docs/postgres.md index e71a1975d8..c30cc1fd8c 100644 --- a/docs/postgres.md +++ b/docs/postgres.md @@ -106,6 +106,17 @@ Note that the above may fail with an error about duplicate rows if corruption has already occurred, and such duplicate rows will need to be manually removed. +## Fixing inconsistent sequences error + +Synapse uses Postgres sequences to generate IDs for various tables. A sequence +and associated table can get out of sync if, for example, Synapse has been +downgraded and then upgraded again. + +To fix the issue shut down Synapse (including any and all workers) and run the +SQL command included in the error message. Once done Synapse should start +successfully. + + ## Tuning Postgres The default settings should be fine for most deployments. For larger diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 48ce7ecd16..a83df7759d 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -41,6 +41,9 @@ class RegistrationWorkerStore(SQLBaseStore): self.config = hs.config self.clock = hs.get_clock() + # Note: we don't check this sequence for consistency as we'd have to + # call `find_max_generated_user_id_localpart` each time, which is + # expensive if there are many entries. self._user_id_seq = build_sequence_generator( database.engine, find_max_generated_user_id_localpart, "user_id_seq", ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index bec3780a32..989f0cbc9d 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -99,6 +99,9 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): self._state_group_seq_gen = build_sequence_generator( self.database_engine, get_max_state_group_txn, "state_group_id_seq" ) + self._state_group_seq_gen.check_consistency( + db_conn, table="state_groups", id_column="id" + ) @cached(max_entries=10000, iterable=True) async def get_state_group_delta(self, state_group): diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 4269eaf918..4fd7573e26 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -258,6 +258,11 @@ class MultiWriterIdGenerator: self._sequence_gen = PostgresSequenceGenerator(sequence_name) + # We check that the table and sequence haven't diverged. + self._sequence_gen.check_consistency( + db_conn, table=table, id_column=id_column, positive=positive + ) + # This goes and fills out the above state from the database. self._load_current_ids(db_conn, table, instance_column, id_column) diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py index ffc1894748..2dd95e2709 100644 --- a/synapse/storage/util/sequence.py +++ b/synapse/storage/util/sequence.py @@ -13,11 +13,34 @@ # See the License for the specific language governing permissions and # limitations under the License. import abc +import logging import threading from typing import Callable, List, Optional -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine -from synapse.storage.types import Cursor +from synapse.storage.engines import ( + BaseDatabaseEngine, + IncorrectDatabaseSetup, + PostgresEngine, +) +from synapse.storage.types import Connection, Cursor + +logger = logging.getLogger(__name__) + + +_INCONSISTENT_SEQUENCE_ERROR = """ +Postgres sequence '%(seq)s' is inconsistent with associated +table '%(table)s'. This can happen if Synapse has been downgraded and +then upgraded again, or due to a bad migration. + +To fix this error, shut down Synapse (including any and all workers) +and run the following SQL: + + SELECT setval('%(seq)s', ( + %(max_id_sql)s + )); + +See docs/postgres.md for more information. +""" class SequenceGenerator(metaclass=abc.ABCMeta): @@ -28,6 +51,19 @@ class SequenceGenerator(metaclass=abc.ABCMeta): """Gets the next ID in the sequence""" ... + @abc.abstractmethod + def check_consistency( + self, db_conn: Connection, table: str, id_column: str, positive: bool = True + ): + """Should be called during start up to test that the current value of + the sequence is greater than or equal to the maximum ID in the table. + + This is to handle various cases where the sequence value can get out + of sync with the table, e.g. if Synapse gets rolled back to a previous + version and the rolled forwards again. + """ + ... + class PostgresSequenceGenerator(SequenceGenerator): """An implementation of SequenceGenerator which uses a postgres sequence""" @@ -45,6 +81,50 @@ class PostgresSequenceGenerator(SequenceGenerator): ) return [i for (i,) in txn] + def check_consistency( + self, db_conn: Connection, table: str, id_column: str, positive: bool = True + ): + txn = db_conn.cursor() + + # First we get the current max ID from the table. + table_sql = "SELECT GREATEST(%(agg)s(%(id)s), 0) FROM %(table)s" % { + "id": id_column, + "table": table, + "agg": "MAX" if positive else "-MIN", + } + + txn.execute(table_sql) + row = txn.fetchone() + if not row: + # Table is empty, so nothing to do. + txn.close() + return + + # Now we fetch the current value from the sequence and compare with the + # above. + max_stream_id = row[0] + txn.execute( + "SELECT last_value, is_called FROM %(seq)s" % {"seq": self._sequence_name} + ) + last_value, is_called = txn.fetchone() + txn.close() + + # If `is_called` is False then `last_value` is actually the value that + # will be generated next, so we decrement to get the true "last value". + if not is_called: + last_value -= 1 + + if max_stream_id > last_value: + logger.warning( + "Postgres sequence %s is behind table %s: %d < %d", + last_value, + max_stream_id, + ) + raise IncorrectDatabaseSetup( + _INCONSISTENT_SEQUENCE_ERROR + % {"seq": self._sequence_name, "table": table, "max_id_sql": table_sql} + ) + GetFirstCallbackType = Callable[[Cursor], int] @@ -81,6 +161,12 @@ class LocalSequenceGenerator(SequenceGenerator): self._current_max_id += 1 return self._current_max_id + def check_consistency( + self, db_conn: Connection, table: str, id_column: str, positive: bool = True + ): + # There is nothing to do for in memory sequences + pass + def build_sequence_generator( database_engine: BaseDatabaseEngine, diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index d4ff55fbff..4558bee7be 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -12,9 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - from synapse.storage.database import DatabasePool +from synapse.storage.engines import IncorrectDatabaseSetup from synapse.storage.util.id_generators import MultiWriterIdGenerator from tests.unittest import HomeserverTestCase @@ -59,7 +58,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): writers=writers, ) - return self.get_success(self.db_pool.runWithConnection(_create)) + return self.get_success_or_raise(self.db_pool.runWithConnection(_create)) def _insert_rows(self, instance_name: str, number: int): """Insert N rows as the given instance, inserting with stream IDs pulled @@ -411,6 +410,23 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.get_success(_get_next_async()) self.assertEqual(id_gen_3.get_persisted_upto_position(), 6) + def test_sequence_consistency(self): + """Test that we error out if the table and sequence diverges. + """ + + # Prefill with some rows + self._insert_row_with_id("master", 3) + + # Now we add a row *without* updating the stream ID + def _insert(txn): + txn.execute("INSERT INTO foobar VALUES (26, 'master')") + + self.get_success(self.db_pool.runInteraction("_insert", _insert)) + + # Creating the ID gen should error + with self.assertRaises(IncorrectDatabaseSetup): + self._create_id_generator("first") + class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase): """Tests MultiWriterIdGenerator that produce *negative* stream IDs. diff --git a/tests/unittest.py b/tests/unittest.py index bbe50c3851..e654c0442d 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import gc import hashlib import hmac @@ -28,6 +27,7 @@ from mock import Mock, patch from canonicaljson import json from twisted.internet.defer import Deferred, ensureDeferred, succeed +from twisted.python.failure import Failure from twisted.python.threadpool import ThreadPool from twisted.trial import unittest @@ -476,6 +476,35 @@ class HomeserverTestCase(TestCase): self.pump() return self.failureResultOf(d, exc) + def get_success_or_raise(self, d, by=0.0): + """Drive deferred to completion and return result or raise exception + on failure. + """ + + if inspect.isawaitable(d): + deferred = ensureDeferred(d) + if not isinstance(deferred, Deferred): + return d + + results = [] # type: list + deferred.addBoth(results.append) + + self.pump(by=by) + + if not results: + self.fail( + "Success result expected on {!r}, found no result instead".format( + deferred + ) + ) + + result = results[0] + + if isinstance(result, Failure): + result.raiseException() + + return result + def register_user(self, username, password, admin=False): """ Register a user. Requires the Admin API be registered. -- cgit 1.5.1 From b1433bf231370636b817ffa01e6cda5a567cfafe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Sep 2020 16:42:19 +0100 Subject: Don't table scan events on worker startup (#8419) * Fix table scan of events on worker startup. This happened because we assumed "new" writers had an initial stream position of 0, so the replication code tried to fetch all events written by the instance between 0 and the current position. Instead, set the initial position of new writers to the current persisted up to position, on the assumption that new writers won't have written anything before that point. * Consider old writers coming back as "new". Otherwise we'd try and fetch entries between the old stale token and the current position, even though it won't have written any rows. Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/8419.feature | 1 + synapse/storage/util/id_generators.py | 26 +++++++++++++++++++++++++- tests/storage/test_id_generators.py | 18 ++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8419.feature (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8419.feature b/changelog.d/8419.feature new file mode 100644 index 0000000000..b363e929ea --- /dev/null +++ b/changelog.d/8419.feature @@ -0,0 +1 @@ +Add experimental support for sharding event persister. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 4fd7573e26..02fbb656e8 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -273,6 +273,19 @@ class MultiWriterIdGenerator: # Load the current positions of all writers for the stream. if self._writers: + # We delete any stale entries in the positions table. This is + # important if we add back a writer after a long time; we want to + # consider that a "new" writer, rather than using the old stale + # entry here. + sql = """ + DELETE FROM stream_positions + WHERE + stream_name = ? + AND instance_name != ALL(?) + """ + sql = self._db.engine.convert_param_style(sql) + cur.execute(sql, (self._stream_name, self._writers)) + sql = """ SELECT instance_name, stream_id FROM stream_positions WHERE stream_name = ? @@ -453,11 +466,22 @@ class MultiWriterIdGenerator: """Returns the position of the given writer. """ + # If we don't have an entry for the given instance name, we assume it's a + # new writer. + # + # For new writers we assume their initial position to be the current + # persisted up to position. This stops Synapse from doing a full table + # scan when a new writer announces itself over replication. with self._lock: - return self._return_factor * self._current_positions.get(instance_name, 0) + return self._return_factor * self._current_positions.get( + instance_name, self._persisted_upto_position + ) def get_positions(self) -> Dict[str, int]: """Get a copy of the current positon map. + + Note that this won't necessarily include all configured writers if some + writers haven't written anything yet. """ with self._lock: diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index 4558bee7be..392b08832b 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -390,17 +390,28 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): # Initial config has two writers id_gen = self._create_id_generator("first", writers=["first", "second"]) self.assertEqual(id_gen.get_persisted_upto_position(), 3) + self.assertEqual(id_gen.get_current_token_for_writer("first"), 3) + self.assertEqual(id_gen.get_current_token_for_writer("second"), 5) # New config removes one of the configs. Note that if the writer is # removed from config we assume that it has been shut down and has # finished persisting, hence why the persisted upto position is 5. id_gen_2 = self._create_id_generator("second", writers=["second"]) self.assertEqual(id_gen_2.get_persisted_upto_position(), 5) + self.assertEqual(id_gen_2.get_current_token_for_writer("second"), 5) # This config points to a single, previously unused writer. id_gen_3 = self._create_id_generator("third", writers=["third"]) self.assertEqual(id_gen_3.get_persisted_upto_position(), 5) + # For new writers we assume their initial position to be the current + # persisted up to position. This stops Synapse from doing a full table + # scan when a new writer comes along. + self.assertEqual(id_gen_3.get_current_token_for_writer("third"), 5) + + id_gen_4 = self._create_id_generator("fourth", writers=["third"]) + self.assertEqual(id_gen_4.get_current_token_for_writer("third"), 5) + # Check that we get a sane next stream ID with this new config. async def _get_next_async(): @@ -410,6 +421,13 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.get_success(_get_next_async()) self.assertEqual(id_gen_3.get_persisted_upto_position(), 6) + # If we add back the old "first" then we shouldn't see the persisted up + # to position revert back to 3. + id_gen_5 = self._create_id_generator("five", writers=["first", "third"]) + self.assertEqual(id_gen_5.get_persisted_upto_position(), 6) + self.assertEqual(id_gen_5.get_current_token_for_writer("first"), 6) + self.assertEqual(id_gen_5.get_current_token_for_writer("third"), 6) + def test_sequence_consistency(self): """Test that we error out if the table and sequence diverges. """ -- cgit 1.5.1 From 695240d34a9dd1c34379ded1fbbbe42a1850549e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Oct 2020 12:22:19 +0100 Subject: Fix DB query on startup for negative streams. (#8447) For negative streams we have to negate the internal stream ID before querying the DB. The effect of this bug was to query far too many rows, slowing start up time, but we would correctly filter the results afterwards so there was no ill effect. --- changelog.d/8447.bugfix | 1 + synapse/storage/util/id_generators.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8447.bugfix (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8447.bugfix b/changelog.d/8447.bugfix new file mode 100644 index 0000000000..88edaf322e --- /dev/null +++ b/changelog.d/8447.bugfix @@ -0,0 +1 @@ +Fix DB query on startup for negative streams which caused long start up times. Introduced in #8374. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 02fbb656e8..48efbb5067 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -341,7 +341,7 @@ class MultiWriterIdGenerator: "cmp": "<=" if self._positive else ">=", } sql = self._db.engine.convert_param_style(sql) - cur.execute(sql, (min_stream_id,)) + cur.execute(sql, (min_stream_id * self._return_factor,)) self._persisted_upto_position = min_stream_id -- cgit 1.5.1 From fa8934b175467d589dd34fae18639cac0d738fc9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Oct 2020 15:15:57 +0100 Subject: Reduce serialization errors in MultiWriterIdGen (#8456) We call `_update_stream_positions_table_txn` a lot, which is an UPSERT that can conflict in `REPEATABLE READ` isolation level. Instead of doing a transaction consisting of a single query we may as well run it outside of a transaction. --- changelog.d/8456.misc | 1 + synapse/storage/database.py | 63 +++++++++++++++++++++++++++++++++-- synapse/storage/engines/_base.py | 17 ++++++++++ synapse/storage/engines/postgres.py | 10 +++++- synapse/storage/engines/sqlite.py | 10 ++++++ synapse/storage/util/id_generators.py | 12 ++++++- tests/storage/test_base.py | 1 + 7 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 changelog.d/8456.misc (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8456.misc b/changelog.d/8456.misc new file mode 100644 index 0000000000..ccd260069b --- /dev/null +++ b/changelog.d/8456.misc @@ -0,0 +1 @@ +Reduce number of serialization errors of `MultiWriterIdGenerator._update_table`. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 79ec8f119d..6116191b16 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -403,6 +403,24 @@ class DatabasePool: *args: Any, **kwargs: Any ) -> R: + """Start a new database transaction with the given connection. + + Note: The given func may be called multiple times under certain + failure modes. This is normally fine when in a standard transaction, + but care must be taken if the connection is in `autocommit` mode that + the function will correctly handle being aborted and retried half way + through its execution. + + Args: + conn + desc + after_callbacks + exception_callbacks + func + *args + **kwargs + """ + start = monotonic_time() txn_id = self._TXN_ID @@ -508,7 +526,12 @@ class DatabasePool: sql_txn_timer.labels(desc).observe(duration) async def runInteraction( - self, desc: str, func: "Callable[..., R]", *args: Any, **kwargs: Any + self, + desc: str, + func: "Callable[..., R]", + *args: Any, + db_autocommit: bool = False, + **kwargs: Any ) -> R: """Starts a transaction on the database and runs a given function @@ -518,6 +541,18 @@ class DatabasePool: database transaction (twisted.enterprise.adbapi.Transaction) as its first argument, followed by `args` and `kwargs`. + db_autocommit: Whether to run the function in "autocommit" mode, + i.e. outside of a transaction. This is useful for transactions + that are only a single query. + + Currently, this is only implemented for Postgres. SQLite will still + run the function inside a transaction. + + WARNING: This means that if func fails half way through then + the changes will *not* be rolled back. `func` may also get + called multiple times if the transaction is retried, so must + correctly handle that case. + args: positional args to pass to `func` kwargs: named args to pass to `func` @@ -538,6 +573,7 @@ class DatabasePool: exception_callbacks, func, *args, + db_autocommit=db_autocommit, **kwargs ) @@ -551,7 +587,11 @@ class DatabasePool: return cast(R, result) async def runWithConnection( - self, func: "Callable[..., R]", *args: Any, **kwargs: Any + self, + func: "Callable[..., R]", + *args: Any, + db_autocommit: bool = False, + **kwargs: Any ) -> R: """Wraps the .runWithConnection() method on the underlying db_pool. @@ -560,6 +600,9 @@ class DatabasePool: database connection (twisted.enterprise.adbapi.Connection) as its first argument, followed by `args` and `kwargs`. args: positional args to pass to `func` + db_autocommit: Whether to run the function in "autocommit" mode, + i.e. outside of a transaction. This is useful for transaction + that are only a single query. Currently only affects postgres. kwargs: named args to pass to `func` Returns: @@ -575,6 +618,13 @@ class DatabasePool: start_time = monotonic_time() def inner_func(conn, *args, **kwargs): + # We shouldn't be in a transaction. If we are then something + # somewhere hasn't committed after doing work. (This is likely only + # possible during startup, as `run*` will ensure changes are + # committed/rolled back before putting the connection back in the + # pool). + assert not self.engine.in_transaction(conn) + with LoggingContext("runWithConnection", parent_context) as context: sched_duration_sec = monotonic_time() - start_time sql_scheduling_timer.observe(sched_duration_sec) @@ -584,7 +634,14 @@ class DatabasePool: logger.debug("Reconnecting closed database connection") conn.reconnect() - return func(conn, *args, **kwargs) + try: + if db_autocommit: + self.engine.attempt_to_set_autocommit(conn, True) + + return func(conn, *args, **kwargs) + finally: + if db_autocommit: + self.engine.attempt_to_set_autocommit(conn, False) return await make_deferred_yieldable( self._db_pool.runWithConnection(inner_func, *args, **kwargs) diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py index 908cbc79e3..d6d632dc10 100644 --- a/synapse/storage/engines/_base.py +++ b/synapse/storage/engines/_base.py @@ -97,3 +97,20 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta): """Gets a string giving the server version. For example: '3.22.0' """ ... + + @abc.abstractmethod + def in_transaction(self, conn: Connection) -> bool: + """Whether the connection is currently in a transaction. + """ + ... + + @abc.abstractmethod + def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool): + """Attempt to set the connections autocommit mode. + + When True queries are run outside of transactions. + + Note: This has no effect on SQLite3, so callers still need to + commit/rollback the connections. + """ + ... diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index ff39281f85..7719ac32f7 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -15,7 +15,8 @@ import logging -from ._base import BaseDatabaseEngine, IncorrectDatabaseSetup +from synapse.storage.engines._base import BaseDatabaseEngine, IncorrectDatabaseSetup +from synapse.storage.types import Connection logger = logging.getLogger(__name__) @@ -119,6 +120,7 @@ class PostgresEngine(BaseDatabaseEngine): cursor.execute("SET synchronous_commit TO OFF") cursor.close() + db_conn.commit() @property def can_native_upsert(self): @@ -171,3 +173,9 @@ class PostgresEngine(BaseDatabaseEngine): return "%i.%i" % (numver / 10000, numver % 10000) else: return "%i.%i.%i" % (numver / 10000, (numver % 10000) / 100, numver % 100) + + def in_transaction(self, conn: Connection) -> bool: + return conn.status != self.module.extensions.STATUS_READY # type: ignore + + def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool): + return conn.set_session(autocommit=autocommit) # type: ignore diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index 8a0f8c89d1..5db0f0b520 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -17,6 +17,7 @@ import threading import typing from synapse.storage.engines import BaseDatabaseEngine +from synapse.storage.types import Connection if typing.TYPE_CHECKING: import sqlite3 # noqa: F401 @@ -86,6 +87,7 @@ class Sqlite3Engine(BaseDatabaseEngine["sqlite3.Connection"]): db_conn.create_function("rank", 1, _rank) db_conn.execute("PRAGMA foreign_keys = ON;") + db_conn.commit() def is_deadlock(self, error): return False @@ -105,6 +107,14 @@ class Sqlite3Engine(BaseDatabaseEngine["sqlite3.Connection"]): """ return "%i.%i.%i" % self.module.sqlite_version_info + def in_transaction(self, conn: Connection) -> bool: + return conn.in_transaction # type: ignore + + def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool): + # Twisted doesn't let us set attributes on the connections, so we can't + # set the connection to autocommit mode. + pass + # Following functions taken from: https://github.com/coleifer/peewee diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 48efbb5067..ad017207aa 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -24,6 +24,7 @@ from typing_extensions import Deque from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.database import DatabasePool, LoggingTransaction +from synapse.storage.types import Cursor from synapse.storage.util.sequence import PostgresSequenceGenerator logger = logging.getLogger(__name__) @@ -552,7 +553,7 @@ class MultiWriterIdGenerator: # do. break - def _update_stream_positions_table_txn(self, txn): + def _update_stream_positions_table_txn(self, txn: Cursor): """Update the `stream_positions` table with newly persisted position. """ @@ -602,10 +603,13 @@ class _MultiWriterCtxManager: stream_ids = attr.ib(type=List[int], factory=list) async def __aenter__(self) -> Union[int, List[int]]: + # It's safe to run this in autocommit mode as fetching values from a + # sequence ignores transaction semantics anyway. self.stream_ids = await self.id_gen._db.runInteraction( "_load_next_mult_id", self.id_gen._load_next_mult_id_txn, self.multiple_ids or 1, + db_autocommit=True, ) # Assert the fetched ID is actually greater than any ID we've already @@ -636,10 +640,16 @@ class _MultiWriterCtxManager: # # We only do this on the success path so that the persisted current # position points to a persisted row with the correct instance name. + # + # We do this in autocommit mode as a) the upsert works correctly outside + # transactions and b) reduces the amount of time the rows are locked + # for. If we don't do this then we'll often hit serialization errors due + # to the fact we default to REPEATABLE READ isolation levels. if self.id_gen._writers: await self.id_gen._db.runInteraction( "MultiWriterIdGenerator._update_table", self.id_gen._update_stream_positions_table_txn, + db_autocommit=True, ) return False diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index 40ba652248..eac7e4dcd2 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -56,6 +56,7 @@ class SQLBaseStoreTestCase(unittest.TestCase): engine = create_engine(sqlite_config) fake_engine = Mock(wraps=engine) fake_engine.can_native_upsert = False + fake_engine.in_transaction.return_value = False db = DatabasePool(Mock(), Mock(config=sqlite_config), fake_engine) db._db_pool = self.db_pool -- cgit 1.5.1 From 20fa83f3744b25e513fdc904261c87c324bbc87e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2020 15:40:06 +0100 Subject: Remove racey assertion in MultiWriterIDGenerator (#8530) We asserted that the IDs returned by postgres sequence was greater than any we had seen, however this is technically racey as we may update the current positions out of order. We now assert that the sequences are correct on startup, so the assertion is no longer really required, so we remove them. --- changelog.d/8530.bugfix | 1 + synapse/storage/util/id_generators.py | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) create mode 100644 changelog.d/8530.bugfix (limited to 'synapse/storage/util/id_generators.py') diff --git a/changelog.d/8530.bugfix b/changelog.d/8530.bugfix new file mode 100644 index 0000000000..443d88424e --- /dev/null +++ b/changelog.d/8530.bugfix @@ -0,0 +1 @@ +Fix rare bug where sending an event would fail due to a racey assertion. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index ad017207aa..eccd2d5b7b 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -612,14 +612,7 @@ class _MultiWriterCtxManager: db_autocommit=True, ) - # Assert the fetched ID is actually greater than any ID we've already - # seen. If not, then the sequence and table have got out of sync - # somehow. with self.id_gen._lock: - assert max(self.id_gen._current_positions.values(), default=0) < min( - self.stream_ids - ) - self.id_gen._unfinished_ids.update(self.stream_ids) if self.multiple_ids is None: -- cgit 1.5.1