From fe8ed1b46f781faa45d1bba8f9308cf47c42010f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Sep 2020 12:22:55 +0100 Subject: Make `StreamToken.room_key` be a `RoomStreamToken` instance. (#8281) --- synapse/storage/persist_events.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'synapse/storage/persist_events.py') diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index dbaeef91dd..d89f6ed128 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -18,7 +18,7 @@ import itertools import logging from collections import deque, namedtuple -from typing import Iterable, List, Optional, Set, Tuple +from typing import Dict, Iterable, List, Optional, Set, Tuple from prometheus_client import Counter, Histogram @@ -31,7 +31,7 @@ from synapse.logging.context import PreserveLoggingContext, make_deferred_yielda from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState -from synapse.types import StateMap +from synapse.types import Collection, StateMap from synapse.util.async_helpers import ObservableDeferred from synapse.util.metrics import Measure @@ -185,6 +185,8 @@ class EventsPersistenceStorage: # store for now. self.main_store = stores.main self.state_store = stores.state + + assert stores.persist_events self.persist_events_store = stores.persist_events self._clock = hs.get_clock() @@ -208,7 +210,7 @@ class EventsPersistenceStorage: Returns: the stream ordering of the latest persisted event """ - partitioned = {} + partitioned = {} # type: Dict[str, List[Tuple[EventBase, EventContext]]] for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) @@ -305,7 +307,9 @@ class EventsPersistenceStorage: # Work out the new "current state" for each room. # We do this by working out what the new extremities are and then # calculating the state from that. - events_by_room = {} + events_by_room = ( + {} + ) # type: Dict[str, List[Tuple[EventBase, EventContext]]] for event, context in chunk: events_by_room.setdefault(event.room_id, []).append( (event, context) @@ -436,7 +440,7 @@ class EventsPersistenceStorage: self, room_id: str, event_contexts: List[Tuple[EventBase, EventContext]], - latest_event_ids: List[str], + latest_event_ids: Collection[str], ): """Calculates the new forward extremities for a room given events to persist. @@ -470,7 +474,7 @@ class EventsPersistenceStorage: # Remove any events which are prev_events of any existing events. existing_prevs = await self.persist_events_store._get_events_which_are_prevs( result - ) + ) # type: Collection[str] result.difference_update(existing_prevs) # Finally handle the case where the new events have soft-failed prev -- cgit 1.5.1 From ac11fcbbb8ccfeb4c72b5aae9faef28469109277 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 24 Sep 2020 13:24:17 +0100 Subject: Add EventStreamPosition type (#8388) The idea is to remove some of the places we pass around `int`, where it can represent one of two things: 1. the position of an event in the stream; or 2. a token that partitions the stream, used as part of the stream tokens. The valid operations are then: 1. did a position happen before or after a token; 2. get all events that happened before or after a token; and 3. get all events between two tokens. (Note that we don't want to allow other operations as we want to change the tokens to be vector clocks rather than simple ints) --- changelog.d/8388.misc | 1 + synapse/handlers/federation.py | 16 +++++--- synapse/handlers/message.py | 6 +-- synapse/handlers/sync.py | 10 ++--- synapse/notifier.py | 55 ++++++++++++++------------ synapse/replication/tcp/client.py | 12 ++++-- synapse/storage/databases/main/roommember.py | 14 ++++--- synapse/storage/persist_events.py | 14 ++++--- synapse/storage/roommember.py | 2 +- synapse/types.py | 15 +++++++ tests/replication/slave/storage/test_events.py | 12 ++++-- 11 files changed, 100 insertions(+), 57 deletions(-) create mode 100644 changelog.d/8388.misc (limited to 'synapse/storage/persist_events.py') diff --git a/changelog.d/8388.misc b/changelog.d/8388.misc new file mode 100644 index 0000000000..aaaef88b66 --- /dev/null +++ b/changelog.d/8388.misc @@ -0,0 +1 @@ +Add `EventStreamPosition` type. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ea9264e751..9f773aefa7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -74,6 +74,8 @@ from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( JsonDict, MutableStateMap, + PersistedEventPosition, + RoomStreamToken, StateMap, UserID, get_domain_from_id, @@ -2956,7 +2958,7 @@ class FederationHandler(BaseHandler): ) return result["max_stream_id"] else: - max_stream_id = await self.storage.persistence.persist_events( + max_stream_token = await self.storage.persistence.persist_events( event_and_contexts, backfilled=backfilled ) @@ -2967,12 +2969,12 @@ class FederationHandler(BaseHandler): if not backfilled: # Never notify for backfilled events for event, _ in event_and_contexts: - await self._notify_persisted_event(event, max_stream_id) + await self._notify_persisted_event(event, max_stream_token) - return max_stream_id + return max_stream_token.stream async def _notify_persisted_event( - self, event: EventBase, max_stream_id: int + self, event: EventBase, max_stream_token: RoomStreamToken ) -> None: """Checks to see if notifier/pushers should be notified about the event or not. @@ -2998,9 +3000,11 @@ class FederationHandler(BaseHandler): elif event.internal_metadata.is_outlier(): return - event_stream_id = event.internal_metadata.stream_ordering + event_pos = PersistedEventPosition( + self._instance_name, event.internal_metadata.stream_ordering + ) self.notifier.on_new_room_event( - event, event_stream_id, max_stream_id, extra_users=extra_users + event, event_pos, max_stream_token, extra_users=extra_users ) async def _clean_room_for_join(self, room_id: str) -> None: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6ee559fd1d..ee271e85e5 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1138,7 +1138,7 @@ class EventCreationHandler: if prev_state_ids: raise AuthError(403, "Changing the room create event is forbidden") - event_stream_id, max_stream_id = await self.storage.persistence.persist_event( + event_pos, max_stream_token = await self.storage.persistence.persist_event( event, context=context ) @@ -1149,7 +1149,7 @@ class EventCreationHandler: def _notify(): try: self.notifier.on_new_room_event( - event, event_stream_id, max_stream_id, extra_users=extra_users + event, event_pos, max_stream_token, extra_users=extra_users ) except Exception: logger.exception("Error notifying about new room event") @@ -1161,7 +1161,7 @@ class EventCreationHandler: # matters as sometimes presence code can take a while. run_in_background(self._bump_active_time, requester.user) - return event_stream_id + return event_pos.stream async def _bump_active_time(self, user: UserID) -> None: try: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9b3a4f638b..e948efef2e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -967,7 +967,7 @@ class SyncHandler: raise NotImplementedError() else: joined_room_ids = await self.get_rooms_for_user_at( - user_id, now_token.room_stream_id + user_id, now_token.room_key ) sync_result_builder = SyncResultBuilder( sync_config, @@ -1916,7 +1916,7 @@ class SyncHandler: raise Exception("Unrecognized rtype: %r", room_builder.rtype) async def get_rooms_for_user_at( - self, user_id: str, stream_ordering: int + self, user_id: str, room_key: RoomStreamToken ) -> FrozenSet[str]: """Get set of joined rooms for a user at the given stream ordering. @@ -1942,15 +1942,15 @@ class SyncHandler: # If the membership's stream ordering is after the given stream # ordering, we need to go and work out if the user was in the room # before. - for room_id, membership_stream_ordering in joined_rooms: - if membership_stream_ordering <= stream_ordering: + for room_id, event_pos in joined_rooms: + if not event_pos.persisted_after(room_key): joined_room_ids.add(room_id) continue logger.info("User joined room after current token: %s", room_id) extrems = await self.store.get_forward_extremeties_for_room( - room_id, stream_ordering + room_id, event_pos.stream ) users_in_room = await self.state.get_current_users_in_room(room_id, extrems) if user_id in users_in_room: diff --git a/synapse/notifier.py b/synapse/notifier.py index a8fd3ef886..441b3d15e2 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -42,7 +42,13 @@ from synapse.logging.utils import log_function from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process from synapse.streams.config import PaginationConfig -from synapse.types import Collection, RoomStreamToken, StreamToken, UserID +from synapse.types import ( + Collection, + PersistedEventPosition, + RoomStreamToken, + StreamToken, + UserID, +) from synapse.util.async_helpers import ObservableDeferred, timeout_deferred from synapse.util.metrics import Measure from synapse.visibility import filter_events_for_client @@ -187,7 +193,7 @@ class Notifier: self.store = hs.get_datastore() self.pending_new_room_events = ( [] - ) # type: List[Tuple[int, EventBase, Collection[UserID]]] + ) # type: List[Tuple[PersistedEventPosition, EventBase, Collection[UserID]]] # Called when there are new things to stream over replication self.replication_callbacks = [] # type: List[Callable[[], None]] @@ -246,8 +252,8 @@ class Notifier: def on_new_room_event( self, event: EventBase, - room_stream_id: int, - max_room_stream_id: int, + event_pos: PersistedEventPosition, + max_room_stream_token: RoomStreamToken, extra_users: Collection[UserID] = [], ): """ Used by handlers to inform the notifier something has happened @@ -261,16 +267,16 @@ class Notifier: until all previous events have been persisted before notifying the client streams. """ - self.pending_new_room_events.append((room_stream_id, event, extra_users)) - self._notify_pending_new_room_events(max_room_stream_id) + self.pending_new_room_events.append((event_pos, event, extra_users)) + self._notify_pending_new_room_events(max_room_stream_token) self.notify_replication() - def _notify_pending_new_room_events(self, max_room_stream_id: int): + def _notify_pending_new_room_events(self, max_room_stream_token: RoomStreamToken): """Notify for the room events that were queued waiting for a previous event to be persisted. Args: - max_room_stream_id: The highest stream_id below which all + max_room_stream_token: The highest stream_id below which all events have been persisted. """ pending = self.pending_new_room_events @@ -279,11 +285,9 @@ class Notifier: users = set() # type: Set[UserID] rooms = set() # type: Set[str] - for room_stream_id, event, extra_users in pending: - if room_stream_id > max_room_stream_id: - self.pending_new_room_events.append( - (room_stream_id, event, extra_users) - ) + for event_pos, event, extra_users in pending: + if event_pos.persisted_after(max_room_stream_token): + self.pending_new_room_events.append((event_pos, event, extra_users)) else: if ( event.type == EventTypes.Member @@ -296,39 +300,38 @@ class Notifier: if users or rooms: self.on_new_event( - "room_key", - RoomStreamToken(None, max_room_stream_id), - users=users, - rooms=rooms, + "room_key", max_room_stream_token, users=users, rooms=rooms, ) - self._on_updated_room_token(max_room_stream_id) + self._on_updated_room_token(max_room_stream_token) - def _on_updated_room_token(self, max_room_stream_id: int): + def _on_updated_room_token(self, max_room_stream_token: RoomStreamToken): """Poke services that might care that the room position has been updated. """ # poke any interested application service. run_as_background_process( - "_notify_app_services", self._notify_app_services, max_room_stream_id + "_notify_app_services", self._notify_app_services, max_room_stream_token ) run_as_background_process( - "_notify_pusher_pool", self._notify_pusher_pool, max_room_stream_id + "_notify_pusher_pool", self._notify_pusher_pool, max_room_stream_token ) if self.federation_sender: - self.federation_sender.notify_new_events(max_room_stream_id) + self.federation_sender.notify_new_events(max_room_stream_token.stream) - async def _notify_app_services(self, max_room_stream_id: int): + async def _notify_app_services(self, max_room_stream_token: RoomStreamToken): try: - await self.appservice_handler.notify_interested_services(max_room_stream_id) + await self.appservice_handler.notify_interested_services( + max_room_stream_token.stream + ) except Exception: logger.exception("Error notifying application services of event") - async def _notify_pusher_pool(self, max_room_stream_id: int): + async def _notify_pusher_pool(self, max_room_stream_token: RoomStreamToken): try: - await self._pusher_pool.on_new_notifications(max_room_stream_id) + await self._pusher_pool.on_new_notifications(max_room_stream_token.stream) except Exception: logger.exception("Error pusher pool of event") diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index e82b9e386f..55af3d41ea 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -29,7 +29,7 @@ from synapse.replication.tcp.streams.events import ( EventsStreamEventRow, EventsStreamRow, ) -from synapse.types import UserID +from synapse.types import PersistedEventPosition, RoomStreamToken, UserID from synapse.util.async_helpers import timeout_deferred from synapse.util.metrics import Measure @@ -151,8 +151,14 @@ class ReplicationDataHandler: extra_users = () # type: Tuple[UserID, ...] if event.type == EventTypes.Member: extra_users = (UserID.from_string(event.state_key),) - max_token = self.store.get_room_max_stream_ordering() - self.notifier.on_new_room_event(event, token, max_token, extra_users) + + max_token = RoomStreamToken( + None, self.store.get_room_max_stream_ordering() + ) + event_pos = PersistedEventPosition(instance_name, token) + self.notifier.on_new_room_event( + event, event_pos, max_token, extra_users + ) # Notify any waiting deferreds. The list is ordered by position so we # just iterate through the list until we reach a position that is diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 4fa8767b01..86ffe2479e 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -13,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import logging from typing import TYPE_CHECKING, Dict, FrozenSet, Iterable, List, Optional, Set @@ -37,7 +36,7 @@ from synapse.storage.roommember import ( ProfileInfo, RoomsForUser, ) -from synapse.types import Collection, get_domain_from_id +from synapse.types import Collection, PersistedEventPosition, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList @@ -387,7 +386,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): # for rooms the server is participating in. if self._current_state_events_membership_up_to_date: sql = """ - SELECT room_id, e.stream_ordering + SELECT room_id, e.instance_name, e.stream_ordering FROM current_state_events AS c INNER JOIN events AS e USING (room_id, event_id) WHERE @@ -397,7 +396,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): """ else: sql = """ - SELECT room_id, e.stream_ordering + SELECT room_id, e.instance_name, e.stream_ordering FROM current_state_events AS c INNER JOIN room_memberships AS m USING (room_id, event_id) INNER JOIN events AS e USING (room_id, event_id) @@ -408,7 +407,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): """ txn.execute(sql, (user_id, Membership.JOIN)) - return frozenset(GetRoomsForUserWithStreamOrdering(*row) for row in txn) + return frozenset( + GetRoomsForUserWithStreamOrdering( + room_id, PersistedEventPosition(instance, stream_id) + ) + for room_id, instance, stream_id in txn + ) async def get_users_server_still_shares_room_with( self, user_ids: Collection[str] diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index d89f6ed128..603cd7d825 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -31,7 +31,7 @@ from synapse.logging.context import PreserveLoggingContext, make_deferred_yielda from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState -from synapse.types import Collection, StateMap +from synapse.types import Collection, PersistedEventPosition, RoomStreamToken, StateMap from synapse.util.async_helpers import ObservableDeferred from synapse.util.metrics import Measure @@ -190,6 +190,7 @@ class EventsPersistenceStorage: self.persist_events_store = stores.persist_events self._clock = hs.get_clock() + self._instance_name = hs.get_instance_name() self.is_mine_id = hs.is_mine_id self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() @@ -198,7 +199,7 @@ class EventsPersistenceStorage: self, events_and_contexts: List[Tuple[EventBase, EventContext]], backfilled: bool = False, - ) -> int: + ) -> RoomStreamToken: """ Write events to the database Args: @@ -228,11 +229,11 @@ class EventsPersistenceStorage: defer.gatherResults(deferreds, consumeErrors=True) ) - return self.main_store.get_current_events_token() + return RoomStreamToken(None, self.main_store.get_current_events_token()) async def persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False - ) -> Tuple[int, int]: + ) -> Tuple[PersistedEventPosition, RoomStreamToken]: """ Returns: The stream ordering of `event`, and the stream ordering of the @@ -247,7 +248,10 @@ class EventsPersistenceStorage: await make_deferred_yieldable(deferred) max_persisted_id = self.main_store.get_current_events_token() - return (event.internal_metadata.stream_ordering, max_persisted_id) + event_stream_id = event.internal_metadata.stream_ordering + + pos = PersistedEventPosition(self._instance_name, event_stream_id) + return pos, RoomStreamToken(None, max_persisted_id) def _maybe_start_persisting(self, room_id: str): async def persisting_queue(item): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 8c4a83a840..f152f63321 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -25,7 +25,7 @@ RoomsForUser = namedtuple( ) GetRoomsForUserWithStreamOrdering = namedtuple( - "_GetRoomsForUserWithStreamOrdering", ("room_id", "stream_ordering") + "_GetRoomsForUserWithStreamOrdering", ("room_id", "event_pos") ) diff --git a/synapse/types.py b/synapse/types.py index a6fc7df22c..ec39f9e1e8 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -495,6 +495,21 @@ class StreamToken: StreamToken.START = StreamToken.from_string("s0_0") +@attr.s(slots=True, frozen=True) +class PersistedEventPosition: + """Position of a newly persisted event with instance that persisted it. + + This can be used to test whether the event is persisted before or after a + RoomStreamToken. + """ + + instance_name = attr.ib(type=str) + stream = attr.ib(type=int) + + def persisted_after(self, token: RoomStreamToken) -> bool: + return token.stream < self.stream + + class ThirdPartyInstanceID( namedtuple("ThirdPartyInstanceID", ("appservice_id", "network_id")) ): diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index bc578411d6..c0ee1cfbd6 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -20,6 +20,7 @@ from synapse.events import FrozenEvent, _EventInternalMetadata, make_event_from_ from synapse.handlers.room import RoomEventSource from synapse.replication.slave.storage.events import SlavedEventStore from synapse.storage.roommember import RoomsForUser +from synapse.types import PersistedEventPosition from tests.server import FakeTransport @@ -204,10 +205,14 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): type="m.room.member", sender=USER_ID_2, key=USER_ID_2, membership="join" ) self.replicate() + + expected_pos = PersistedEventPosition( + "master", j2.internal_metadata.stream_ordering + ) self.check( "get_rooms_for_user_with_stream_ordering", (USER_ID_2,), - {(ROOM_ID, j2.internal_metadata.stream_ordering)}, + {(ROOM_ID, expected_pos)}, ) def test_get_rooms_for_user_with_stream_ordering_with_multi_event_persist(self): @@ -293,9 +298,10 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): # the membership change is only any use to us if the room is in the # joined_rooms list. if membership_changes: - self.assertEqual( - joined_rooms, {(ROOM_ID, j2.internal_metadata.stream_ordering)} + expected_pos = PersistedEventPosition( + "master", j2.internal_metadata.stream_ordering ) + self.assertEqual(joined_rooms, {(ROOM_ID, expected_pos)}) event_id = 0 -- cgit 1.5.1 From 2649d545a551dd126d73d34a6e3172916ea483e0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 29 Sep 2020 15:57:36 +0100 Subject: Mypy fixes for `synapse.handlers.federation` (#8422) For some reason, an apparently unrelated PR upset mypy about this module. Here are a number of little fixes. --- changelog.d/8422.misc | 1 + synapse/federation/federation_client.py | 4 +++- synapse/handlers/federation.py | 13 +++++++++---- synapse/storage/databases/state/store.py | 4 ++-- synapse/storage/persist_events.py | 2 +- synapse/storage/state.py | 6 +++--- 6 files changed, 19 insertions(+), 11 deletions(-) create mode 100644 changelog.d/8422.misc (limited to 'synapse/storage/persist_events.py') diff --git a/changelog.d/8422.misc b/changelog.d/8422.misc new file mode 100644 index 0000000000..03fba120c6 --- /dev/null +++ b/changelog.d/8422.misc @@ -0,0 +1 @@ +Typing fixes for `synapse.handlers.federation`. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 688d43fffb..302b2f69bc 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -24,10 +24,12 @@ from typing import ( Dict, Iterable, List, + Mapping, Optional, Sequence, Tuple, TypeVar, + Union, ) from prometheus_client import Counter @@ -501,7 +503,7 @@ class FederationClient(FederationBase): user_id: str, membership: str, content: dict, - params: Dict[str, str], + params: Optional[Mapping[str, Union[str, Iterable[str]]]], ) -> Tuple[str, EventBase, RoomVersion]: """ Creates an m.room.member event, with context, without participating in the room. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5bcfb231b2..0073e7c996 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -155,8 +155,9 @@ class FederationHandler(BaseHandler): self._device_list_updater = hs.get_device_handler().device_list_updater self._maybe_store_room_on_invite = self.store.maybe_store_room_on_invite - # When joining a room we need to queue any events for that room up - self.room_queues = {} + # When joining a room we need to queue any events for that room up. + # For each room, a list of (pdu, origin) tuples. + self.room_queues = {} # type: Dict[str, List[Tuple[EventBase, str]]] self._room_pdu_linearizer = Linearizer("fed_room_pdu") self.third_party_event_rules = hs.get_third_party_event_rules() @@ -814,6 +815,9 @@ class FederationHandler(BaseHandler): dest, room_id, limit=limit, extremities=extremities ) + if not events: + return [] + # ideally we'd sanity check the events here for excess prev_events etc, # but it's hard to reject events at this point without completely # breaking backfill in the same way that it is currently broken by @@ -2164,10 +2168,10 @@ class FederationHandler(BaseHandler): # given state at the event. This should correctly handle cases # like bans, especially with state res v2. - state_sets = await self.state_store.get_state_groups( + state_sets_d = await self.state_store.get_state_groups( event.room_id, extrem_ids ) - state_sets = list(state_sets.values()) + state_sets = list(state_sets_d.values()) # type: List[Iterable[EventBase]] state_sets.append(state) current_states = await self.state_handler.resolve_events( room_version, state_sets, event @@ -2958,6 +2962,7 @@ class FederationHandler(BaseHandler): ) return result["max_stream_id"] else: + assert self.storage.persistence max_stream_token = await self.storage.persistence.persist_events( event_and_contexts, backfilled=backfilled ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 989f0cbc9d..0e31cc811a 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -24,7 +24,7 @@ from synapse.storage.databases.state.bg_updates import StateBackgroundUpdateStor from synapse.storage.state import StateFilter from synapse.storage.types import Cursor from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import StateMap +from synapse.types import MutableStateMap, StateMap from synapse.util.caches.descriptors import cached from synapse.util.caches.dictionary_cache import DictionaryCache @@ -208,7 +208,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): async def _get_state_for_groups( self, groups: Iterable[int], state_filter: StateFilter = StateFilter.all() - ) -> Dict[int, StateMap[str]]: + ) -> Dict[int, MutableStateMap[str]]: """Gets the state at each of a list of state groups, optionally filtering by type/state_key diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 603cd7d825..ded6cf9655 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -197,7 +197,7 @@ class EventsPersistenceStorage: async def persist_events( self, - events_and_contexts: List[Tuple[EventBase, EventContext]], + events_and_contexts: Iterable[Tuple[EventBase, EventContext]], backfilled: bool = False, ) -> RoomStreamToken: """ diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 8f68d968f0..08a69f2f96 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -20,7 +20,7 @@ import attr from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.types import StateMap +from synapse.types import MutableStateMap, StateMap logger = logging.getLogger(__name__) @@ -349,7 +349,7 @@ class StateGroupStorage: async def get_state_groups_ids( self, _room_id: str, event_ids: Iterable[str] - ) -> Dict[int, StateMap[str]]: + ) -> Dict[int, MutableStateMap[str]]: """Get the event IDs of all the state for the state groups for the given events Args: @@ -532,7 +532,7 @@ class StateGroupStorage: def _get_state_for_groups( self, groups: Iterable[int], state_filter: StateFilter = StateFilter.all() - ) -> Awaitable[Dict[int, StateMap[str]]]: + ) -> Awaitable[Dict[int, MutableStateMap[str]]]: """Gets the state at each of a list of state groups, optionally filtering by type/state_key -- cgit 1.5.1 From ea70f1c362dc4bd6c0f8a67e16ed0971fe095e5b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Sep 2020 21:48:33 +0100 Subject: Various clean ups to room stream tokens. (#8423) --- changelog.d/8423.misc | 1 + synapse/events/__init__.py | 6 ++-- synapse/handlers/admin.py | 2 +- synapse/handlers/device.py | 4 +-- synapse/handlers/initial_sync.py | 3 +- synapse/handlers/pagination.py | 5 ++- synapse/handlers/room.py | 4 +-- synapse/handlers/sync.py | 20 ++++++++---- synapse/notifier.py | 4 +-- synapse/replication/tcp/client.py | 6 ++-- synapse/rest/admin/__init__.py | 3 +- synapse/storage/databases/main/stream.py | 38 +++++++++++++---------- synapse/storage/persist_events.py | 5 ++- synapse/types.py | 53 ++++++++++++++++++++------------ tests/rest/client/v1/test_rooms.py | 8 ++--- tests/storage/test_purge.py | 10 +++--- 16 files changed, 96 insertions(+), 76 deletions(-) create mode 100644 changelog.d/8423.misc (limited to 'synapse/storage/persist_events.py') diff --git a/changelog.d/8423.misc b/changelog.d/8423.misc new file mode 100644 index 0000000000..7260e3fa41 --- /dev/null +++ b/changelog.d/8423.misc @@ -0,0 +1 @@ +Various refactors to simplify stream token handling. diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index bf800a3852..dc49df0812 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -23,7 +23,7 @@ from typing import Dict, Optional, Tuple, Type from unpaddedbase64 import encode_base64 from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions -from synapse.types import JsonDict +from synapse.types import JsonDict, RoomStreamToken from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze @@ -118,8 +118,8 @@ class _EventInternalMetadata: # XXX: These are set by StreamWorkerStore._set_before_and_after. # I'm pretty sure that these are never persisted to the database, so shouldn't # be here - before = DictProperty("before") # type: str - after = DictProperty("after") # type: str + before = DictProperty("before") # type: RoomStreamToken + after = DictProperty("after") # type: RoomStreamToken order = DictProperty("order") # type: Tuple[int, int] def get_dict(self) -> JsonDict: diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index dd981c597e..1ce2091b46 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -153,7 +153,7 @@ class AdminHandler(BaseHandler): if not events: break - from_key = RoomStreamToken.parse(events[-1].internal_metadata.after) + from_key = events[-1].internal_metadata.after events = await filter_events_for_client(self.storage, user_id, events) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 4149520d6c..b9d9098104 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -29,7 +29,6 @@ from synapse.api.errors import ( from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import ( - RoomStreamToken, StreamToken, get_domain_from_id, get_verify_key_from_cross_signing_key, @@ -113,8 +112,7 @@ class DeviceWorkerHandler(BaseHandler): set_tag("user_id", user_id) set_tag("from_token", from_token) - now_room_id = self.store.get_room_max_stream_ordering() - now_room_key = RoomStreamToken(None, now_room_id) + now_room_key = self.store.get_room_max_token() room_ids = await self.store.get_rooms_for_user(user_id) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 8cd7eb22a3..43f15435de 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -325,7 +325,8 @@ class InitialSyncHandler(BaseHandler): if limit is None: limit = 10 - stream_token = await self.store.get_stream_token_for_event(member_event_id) + leave_position = await self.store.get_position_for_event(member_event_id) + stream_token = leave_position.to_room_stream_token() messages, token = await self.store.get_recent_events_for_room( room_id, limit=limit, end_token=stream_token diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index a0b3bdb5e0..d6779a4b44 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -25,7 +25,7 @@ from synapse.logging.context import run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig -from synapse.types import Requester, RoomStreamToken +from synapse.types import Requester from synapse.util.async_helpers import ReadWriteLock from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client @@ -373,10 +373,9 @@ class PaginationHandler: # case "JOIN" would have been returned. assert member_event_id - leave_token_str = await self.store.get_topological_token_for_event( + leave_token = await self.store.get_topological_token_for_event( member_event_id ) - leave_token = RoomStreamToken.parse(leave_token_str) assert leave_token.topological is not None if leave_token.topological < curr_topo: diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 11bf146bed..836b3f381a 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1134,14 +1134,14 @@ class RoomEventSource: events[:] = events[:limit] if events: - end_key = RoomStreamToken.parse(events[-1].internal_metadata.after) + end_key = events[-1].internal_metadata.after else: end_key = to_key return (events, end_key) def get_current_key(self) -> RoomStreamToken: - return RoomStreamToken(None, self.store.get_room_max_stream_ordering()) + return self.store.get_room_max_token() def get_current_key_for_room(self, room_id: str) -> Awaitable[str]: return self.store.get_room_events_max_id(room_id) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e948efef2e..bfe2583002 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -519,7 +519,7 @@ class SyncHandler: if len(recents) > timeline_limit: limited = True recents = recents[-timeline_limit:] - room_key = RoomStreamToken.parse(recents[0].internal_metadata.before) + room_key = recents[0].internal_metadata.before prev_batch_token = now_token.copy_and_replace("room_key", room_key) @@ -1595,16 +1595,24 @@ class SyncHandler: if leave_events: leave_event = leave_events[-1] - leave_stream_token = await self.store.get_stream_token_for_event( + leave_position = await self.store.get_position_for_event( leave_event.event_id ) - leave_token = since_token.copy_and_replace( - "room_key", leave_stream_token - ) - if since_token and since_token.is_after(leave_token): + # If the leave event happened before the since token then we + # bail. + if since_token and not leave_position.persisted_after( + since_token.room_key + ): continue + # We can safely convert the position of the leave event into a + # stream token as it'll only be used in the context of this + # room. (c.f. the docstring of `to_room_stream_token`). + leave_token = since_token.copy_and_replace( + "room_key", leave_position.to_room_stream_token() + ) + # If this is an out of band message, like a remote invite # rejection, we include it in the recents batch. Otherwise, we # let _load_filtered_recents handle fetching the correct diff --git a/synapse/notifier.py b/synapse/notifier.py index 441b3d15e2..59415f6f88 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -163,7 +163,7 @@ class _NotifierUserStream: """ # Immediately wake up stream if something has already since happened # since their last token. - if self.last_notified_token.is_after(token): + if self.last_notified_token != token: return _NotificationListener(defer.succeed(self.current_token)) else: return _NotificationListener(self.notify_deferred.observe()) @@ -470,7 +470,7 @@ class Notifier: async def check_for_updates( before_token: StreamToken, after_token: StreamToken ) -> EventStreamResult: - if not after_token.is_after(before_token): + if after_token == before_token: return EventStreamResult([], (from_token, from_token)) events = [] # type: List[EventBase] diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 55af3d41ea..e165429cad 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -29,7 +29,7 @@ from synapse.replication.tcp.streams.events import ( EventsStreamEventRow, EventsStreamRow, ) -from synapse.types import PersistedEventPosition, RoomStreamToken, UserID +from synapse.types import PersistedEventPosition, UserID from synapse.util.async_helpers import timeout_deferred from synapse.util.metrics import Measure @@ -152,9 +152,7 @@ class ReplicationDataHandler: if event.type == EventTypes.Member: extra_users = (UserID.from_string(event.state_key),) - max_token = RoomStreamToken( - None, self.store.get_room_max_stream_ordering() - ) + max_token = self.store.get_room_max_token() event_pos = PersistedEventPosition(instance_name, token) self.notifier.on_new_room_event( event, event_pos, max_token, extra_users diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 5c5f00b213..ba53f66f02 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -109,7 +109,8 @@ class PurgeHistoryRestServlet(RestServlet): if event.room_id != room_id: raise SynapseError(400, "Event is for wrong room.") - token = await self.store.get_topological_token_for_event(event_id) + room_token = await self.store.get_topological_token_for_event(event_id) + token = str(room_token) logger.info("[purge] purging up to token %s (event_id %s)", token, event_id) elif "purge_up_to_ts" in body: diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 92e96468b4..37249f1e3f 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -35,7 +35,6 @@ what sort order was used: - topological tokems: "t%d-%d", where the integers map to the topological and stream ordering columns respectively. """ - import abc import logging from collections import namedtuple @@ -54,7 +53,7 @@ from synapse.storage.database import ( ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine -from synapse.types import Collection, RoomStreamToken +from synapse.types import Collection, PersistedEventPosition, RoomStreamToken from synapse.util.caches.stream_change_cache import StreamChangeCache if TYPE_CHECKING: @@ -305,6 +304,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): def get_room_min_stream_ordering(self) -> int: raise NotImplementedError() + def get_room_max_token(self) -> RoomStreamToken: + return RoomStreamToken(None, self.get_room_max_stream_ordering()) + async def get_room_events_stream_for_rooms( self, room_ids: Collection[str], @@ -611,26 +613,28 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): allow_none=allow_none, ) - async def get_stream_token_for_event(self, event_id: str) -> RoomStreamToken: - """The stream token for an event - Args: - event_id: The id of the event to look up a stream token for. - Raises: - StoreError if the event wasn't in the database. - Returns: - A stream token. + async def get_position_for_event(self, event_id: str) -> PersistedEventPosition: + """Get the persisted position for an event """ - stream_id = await self.get_stream_id_for_event(event_id) - return RoomStreamToken(None, stream_id) + row = await self.db_pool.simple_select_one( + table="events", + keyvalues={"event_id": event_id}, + retcols=("stream_ordering", "instance_name"), + desc="get_position_for_event", + ) + + return PersistedEventPosition( + row["instance_name"] or "master", row["stream_ordering"] + ) - async def get_topological_token_for_event(self, event_id: str) -> str: + async def get_topological_token_for_event(self, event_id: str) -> RoomStreamToken: """The stream token for an event Args: event_id: The id of the event to look up a stream token for. Raises: StoreError if the event wasn't in the database. Returns: - A "t%d-%d" topological token. + A `RoomStreamToken` topological token. """ row = await self.db_pool.simple_select_one( table="events", @@ -638,7 +642,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): retcols=("stream_ordering", "topological_ordering"), desc="get_topological_token_for_event", ) - return "t%d-%d" % (row["topological_ordering"], row["stream_ordering"]) + return RoomStreamToken(row["topological_ordering"], row["stream_ordering"]) async def get_current_topological_token(self, room_id: str, stream_key: int) -> int: """Gets the topological token in a room after or at the given stream @@ -687,8 +691,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): else: topo = None internal = event.internal_metadata - internal.before = str(RoomStreamToken(topo, stream - 1)) - internal.after = str(RoomStreamToken(topo, stream)) + internal.before = RoomStreamToken(topo, stream - 1) + internal.after = RoomStreamToken(topo, stream) internal.order = (int(topo) if topo else 0, int(stream)) async def get_events_around( diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index ded6cf9655..72939f3984 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -229,7 +229,7 @@ class EventsPersistenceStorage: defer.gatherResults(deferreds, consumeErrors=True) ) - return RoomStreamToken(None, self.main_store.get_current_events_token()) + return self.main_store.get_room_max_token() async def persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False @@ -247,11 +247,10 @@ class EventsPersistenceStorage: await make_deferred_yieldable(deferred) - max_persisted_id = self.main_store.get_current_events_token() event_stream_id = event.internal_metadata.stream_ordering pos = PersistedEventPosition(self._instance_name, event_stream_id) - return pos, RoomStreamToken(None, max_persisted_id) + return pos, self.main_store.get_room_max_token() def _maybe_start_persisting(self, room_id: str): async def persisting_queue(item): diff --git a/synapse/types.py b/synapse/types.py index ec39f9e1e8..02bcc197ec 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -413,6 +413,18 @@ class RoomStreamToken: pass raise SynapseError(400, "Invalid token %r" % (string,)) + def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken": + """Return a new token such that if an event is after both this token and + the other token, then its after the returned token too. + """ + + if self.topological or other.topological: + raise Exception("Can't advance topological tokens") + + max_stream = max(self.stream, other.stream) + + return RoomStreamToken(None, max_stream) + def as_tuple(self) -> Tuple[Optional[int], int]: return (self.topological, self.stream) @@ -458,31 +470,20 @@ class StreamToken: def room_stream_id(self): return self.room_key.stream - def is_after(self, other): - """Does this token contain events that the other doesn't?""" - return ( - (other.room_stream_id < self.room_stream_id) - or (int(other.presence_key) < int(self.presence_key)) - or (int(other.typing_key) < int(self.typing_key)) - or (int(other.receipt_key) < int(self.receipt_key)) - or (int(other.account_data_key) < int(self.account_data_key)) - or (int(other.push_rules_key) < int(self.push_rules_key)) - or (int(other.to_device_key) < int(self.to_device_key)) - or (int(other.device_list_key) < int(self.device_list_key)) - or (int(other.groups_key) < int(self.groups_key)) - ) - def copy_and_advance(self, key, new_value) -> "StreamToken": """Advance the given key in the token to a new value if and only if the new value is after the old value. """ - new_token = self.copy_and_replace(key, new_value) if key == "room_key": - new_id = new_token.room_stream_id - old_id = self.room_stream_id - else: - new_id = int(getattr(new_token, key)) - old_id = int(getattr(self, key)) + new_token = self.copy_and_replace( + "room_key", self.room_key.copy_and_advance(new_value) + ) + return new_token + + new_token = self.copy_and_replace(key, new_value) + new_id = int(getattr(new_token, key)) + old_id = int(getattr(self, key)) + if old_id < new_id: return new_token else: @@ -509,6 +510,18 @@ class PersistedEventPosition: def persisted_after(self, token: RoomStreamToken) -> bool: return token.stream < self.stream + def to_room_stream_token(self) -> RoomStreamToken: + """Converts the position to a room stream token such that events + persisted in the same room after this position will be after the + returned `RoomStreamToken`. + + Note: no guarentees are made about ordering w.r.t. events in other + rooms. + """ + # Doing the naive thing satisfies the desired properties described in + # the docstring. + return RoomStreamToken(None, self.stream) + class ThirdPartyInstanceID( namedtuple("ThirdPartyInstanceID", ("appservice_id", "network_id")) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 0a567b032f..a3287011e9 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -902,15 +902,15 @@ class RoomMessageListTestCase(RoomBase): # Send a first message in the room, which will be removed by the purge. first_event_id = self.helper.send(self.room_id, "message 1")["event_id"] - first_token = self.get_success( - store.get_topological_token_for_event(first_event_id) + first_token = str( + self.get_success(store.get_topological_token_for_event(first_event_id)) ) # Send a second message in the room, which won't be removed, and which we'll # use as the marker to purge events before. second_event_id = self.helper.send(self.room_id, "message 2")["event_id"] - second_token = self.get_success( - store.get_topological_token_for_event(second_event_id) + second_token = str( + self.get_success(store.get_topological_token_for_event(second_event_id)) ) # Send a third event in the room to ensure we don't fall under any edge case diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py index 918387733b..723cd28933 100644 --- a/tests/storage/test_purge.py +++ b/tests/storage/test_purge.py @@ -47,8 +47,8 @@ class PurgeTests(HomeserverTestCase): storage = self.hs.get_storage() # Get the topological token - event = self.get_success( - store.get_topological_token_for_event(last["event_id"]) + event = str( + self.get_success(store.get_topological_token_for_event(last["event_id"])) ) # Purge everything before this topological token @@ -74,12 +74,10 @@ class PurgeTests(HomeserverTestCase): storage = self.hs.get_datastore() # Set the topological token higher than it should be - event = self.get_success( + token = self.get_success( storage.get_topological_token_for_event(last["event_id"]) ) - event = "t{}-{}".format( - *list(map(lambda x: x + 1, map(int, event[1:].split("-")))) - ) + event = "t{}-{}".format(token.topological + 1, token.stream + 1) # Purge everything before this topological token purge = defer.ensureDeferred(storage.purge_history(self.room_id, event, True)) -- cgit 1.5.1