From 37be52ac34f3ec2e409f2961e428c1f42d25d729 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 31 Jul 2018 16:29:09 -0600 Subject: limt -> limit --- synapse/federation/transport/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 4529d454af..625b4def9b 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -106,7 +106,7 @@ class TransportLayerClient(object): dest (str) room_id (str) event_tuples (list) - limt (int) + limit (int) Returns: Deferred: Results in a dict received from the remote homeserver. -- cgit 1.5.1 From 119451dcd192ff5397a3f9630f14556737e82677 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 7 Aug 2018 15:22:40 +0100 Subject: Refactor state module We split out the actual state resolution algorithm to prepare for having multiple versions. --- synapse/state.py | 866 ---------------------------------------------- synapse/state/__init__.py | 574 ++++++++++++++++++++++++++++++ synapse/state/v1.py | 321 +++++++++++++++++ 3 files changed, 895 insertions(+), 866 deletions(-) delete mode 100644 synapse/state.py create mode 100644 synapse/state/__init__.py create mode 100644 synapse/state/v1.py diff --git a/synapse/state.py b/synapse/state.py deleted file mode 100644 index e1092b97a9..0000000000 --- a/synapse/state.py +++ /dev/null @@ -1,866 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import hashlib -import logging -from collections import namedtuple - -from six import iteritems, iterkeys, itervalues - -from frozendict import frozendict - -from twisted.internet import defer - -from synapse import event_auth -from synapse.api.constants import EventTypes -from synapse.api.errors import AuthError -from synapse.events.snapshot import EventContext -from synapse.util.async import Linearizer -from synapse.util.caches import CACHE_SIZE_FACTOR -from synapse.util.caches.expiringcache import ExpiringCache -from synapse.util.logutils import log_function -from synapse.util.metrics import Measure - -logger = logging.getLogger(__name__) - - -KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key")) - - -SIZE_OF_CACHE = int(100000 * CACHE_SIZE_FACTOR) -EVICTION_TIMEOUT_SECONDS = 60 * 60 - - -_NEXT_STATE_ID = 1 - -POWER_KEY = (EventTypes.PowerLevels, "") - - -def _gen_state_id(): - global _NEXT_STATE_ID - s = "X%d" % (_NEXT_STATE_ID,) - _NEXT_STATE_ID += 1 - return s - - -class _StateCacheEntry(object): - __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"] - - def __init__(self, state, state_group, prev_group=None, delta_ids=None): - # dict[(str, str), str] map from (type, state_key) to event_id - self.state = frozendict(state) - - # the ID of a state group if one and only one is involved. - # otherwise, None otherwise? - self.state_group = state_group - - self.prev_group = prev_group - self.delta_ids = frozendict(delta_ids) if delta_ids is not None else None - - # The `state_id` is a unique ID we generate that can be used as ID for - # this collection of state. Usually this would be the same as the - # state group, but on worker instances we can't generate a new state - # group each time we resolve state, so we generate a separate one that - # isn't persisted and is used solely for caches. - # `state_id` is either a state_group (and so an int) or a string. This - # ensures we don't accidentally persist a state_id as a stateg_group - if state_group: - self.state_id = state_group - else: - self.state_id = _gen_state_id() - - def __len__(self): - return len(self.state) - - -class StateHandler(object): - """Fetches bits of state from the stores, and does state resolution - where necessary - """ - - def __init__(self, hs): - self.clock = hs.get_clock() - self.store = hs.get_datastore() - self.hs = hs - self._state_resolution_handler = hs.get_state_resolution_handler() - - def start_caching(self): - # TODO: remove this shim - self._state_resolution_handler.start_caching() - - @defer.inlineCallbacks - def get_current_state(self, room_id, event_type=None, state_key="", - latest_event_ids=None): - """ Retrieves the current state for the room. This is done by - calling `get_latest_events_in_room` to get the leading edges of the - event graph and then resolving any of the state conflicts. - - This is equivalent to getting the state of an event that were to send - next before receiving any new events. - - If `event_type` is specified, then the method returns only the one - event (or None) with that `event_type` and `state_key`. - - Returns: - map from (type, state_key) to event - """ - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - - logger.debug("calling resolve_state_groups from get_current_state") - ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) - state = ret.state - - if event_type: - event_id = state.get((event_type, state_key)) - event = None - if event_id: - event = yield self.store.get_event(event_id, allow_none=True) - defer.returnValue(event) - return - - state_map = yield self.store.get_events(list(state.values()), - get_prev_content=False) - state = { - key: state_map[e_id] for key, e_id in iteritems(state) if e_id in state_map - } - - defer.returnValue(state) - - @defer.inlineCallbacks - def get_current_state_ids(self, room_id, latest_event_ids=None): - """Get the current state, or the state at a set of events, for a room - - Args: - room_id (str): - - latest_event_ids (iterable[str]|None): if given, the forward - extremities to resolve. If None, we look them up from the - database (via a cache) - - Returns: - Deferred[dict[(str, str), str)]]: the state dict, mapping from - (event_type, state_key) -> event_id - """ - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - - logger.debug("calling resolve_state_groups from get_current_state_ids") - ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) - state = ret.state - - defer.returnValue(state) - - @defer.inlineCallbacks - def get_current_user_in_room(self, room_id, latest_event_ids=None): - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.debug("calling resolve_state_groups from get_current_user_in_room") - entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) - joined_users = yield self.store.get_joined_users_from_state(room_id, entry) - defer.returnValue(joined_users) - - @defer.inlineCallbacks - def get_current_hosts_in_room(self, room_id, latest_event_ids=None): - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.debug("calling resolve_state_groups from get_current_hosts_in_room") - entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) - joined_hosts = yield self.store.get_joined_hosts(room_id, entry) - defer.returnValue(joined_hosts) - - @defer.inlineCallbacks - def compute_event_context(self, event, old_state=None): - """Build an EventContext structure for the event. - - This works out what the current state should be for the event, and - generates a new state group if necessary. - - Args: - event (synapse.events.EventBase): - old_state (dict|None): The state at the event if it can't be - calculated from existing events. This is normally only specified - when receiving an event from federation where we don't have the - prev events for, e.g. when backfilling. - Returns: - synapse.events.snapshot.EventContext: - """ - - if event.internal_metadata.is_outlier(): - # If this is an outlier, then we know it shouldn't have any current - # state. Certainly store.get_current_state won't return any, and - # persisting the event won't store the state group. - if old_state: - prev_state_ids = { - (s.type, s.state_key): s.event_id for s in old_state - } - if event.is_state(): - current_state_ids = dict(prev_state_ids) - key = (event.type, event.state_key) - current_state_ids[key] = event.event_id - else: - current_state_ids = prev_state_ids - else: - current_state_ids = {} - prev_state_ids = {} - - # We don't store state for outliers, so we don't generate a state - # group for it. - context = EventContext.with_state( - state_group=None, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - ) - - defer.returnValue(context) - - if old_state: - # We already have the state, so we don't need to calculate it. - # Let's just correctly fill out the context and create a - # new state group for it. - - prev_state_ids = { - (s.type, s.state_key): s.event_id for s in old_state - } - - if event.is_state(): - key = (event.type, event.state_key) - if key in prev_state_ids: - replaces = prev_state_ids[key] - if replaces != event.event_id: # Paranoia check - event.unsigned["replaces_state"] = replaces - current_state_ids = dict(prev_state_ids) - current_state_ids[key] = event.event_id - else: - current_state_ids = prev_state_ids - - state_group = yield self.store.store_state_group( - event.event_id, - event.room_id, - prev_group=None, - delta_ids=None, - current_state_ids=current_state_ids, - ) - - context = EventContext.with_state( - state_group=state_group, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - ) - - defer.returnValue(context) - - logger.debug("calling resolve_state_groups from compute_event_context") - entry = yield self.resolve_state_groups_for_events( - event.room_id, [e for e, _ in event.prev_events], - ) - - prev_state_ids = entry.state - prev_group = None - delta_ids = None - - if event.is_state(): - # If this is a state event then we need to create a new state - # group for the state after this event. - - key = (event.type, event.state_key) - if key in prev_state_ids: - replaces = prev_state_ids[key] - event.unsigned["replaces_state"] = replaces - - current_state_ids = dict(prev_state_ids) - current_state_ids[key] = event.event_id - - if entry.state_group: - # If the state at the event has a state group assigned then - # we can use that as the prev group - prev_group = entry.state_group - delta_ids = { - key: event.event_id - } - elif entry.prev_group: - # If the state at the event only has a prev group, then we can - # use that as a prev group too. - prev_group = entry.prev_group - delta_ids = dict(entry.delta_ids) - delta_ids[key] = event.event_id - - state_group = yield self.store.store_state_group( - event.event_id, - event.room_id, - prev_group=prev_group, - delta_ids=delta_ids, - current_state_ids=current_state_ids, - ) - else: - current_state_ids = prev_state_ids - prev_group = entry.prev_group - delta_ids = entry.delta_ids - - if entry.state_group is None: - entry.state_group = yield self.store.store_state_group( - event.event_id, - event.room_id, - prev_group=entry.prev_group, - delta_ids=entry.delta_ids, - current_state_ids=current_state_ids, - ) - entry.state_id = entry.state_group - - state_group = entry.state_group - - context = EventContext.with_state( - state_group=state_group, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - prev_group=prev_group, - delta_ids=delta_ids, - ) - - defer.returnValue(context) - - @defer.inlineCallbacks - def resolve_state_groups_for_events(self, room_id, event_ids): - """ Given a list of event_ids this method fetches the state at each - event, resolves conflicts between them and returns them. - - Args: - room_id (str): - event_ids (list[str]): - - Returns: - Deferred[_StateCacheEntry]: resolved state - """ - logger.debug("resolve_state_groups event_ids %s", event_ids) - - # map from state group id to the state in that state group (where - # 'state' is a map from state key to event id) - # dict[int, dict[(str, str), str]] - state_groups_ids = yield self.store.get_state_groups_ids( - room_id, event_ids - ) - - if len(state_groups_ids) == 1: - name, state_list = list(state_groups_ids.items()).pop() - - prev_group, delta_ids = yield self.store.get_state_group_delta(name) - - defer.returnValue(_StateCacheEntry( - state=state_list, - state_group=name, - prev_group=prev_group, - delta_ids=delta_ids, - )) - - result = yield self._state_resolution_handler.resolve_state_groups( - room_id, state_groups_ids, None, self._state_map_factory, - ) - defer.returnValue(result) - - def _state_map_factory(self, ev_ids): - return self.store.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ) - - def resolve_events(self, state_sets, event): - logger.info( - "Resolving state for %s with %d groups", event.room_id, len(state_sets) - ) - state_set_ids = [{ - (ev.type, ev.state_key): ev.event_id - for ev in st - } for st in state_sets] - - state_map = { - ev.event_id: ev - for st in state_sets - for ev in st - } - - with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events_with_state_map(state_set_ids, state_map) - - new_state = { - key: state_map[ev_id] for key, ev_id in iteritems(new_state) - } - - return new_state - - -class StateResolutionHandler(object): - """Responsible for doing state conflict resolution. - - Note that the storage layer depends on this handler, so all functions must - be storage-independent. - """ - def __init__(self, hs): - self.clock = hs.get_clock() - - # dict of set of event_ids -> _StateCacheEntry. - self._state_cache = None - self.resolve_linearizer = Linearizer(name="state_resolve_lock") - - def start_caching(self): - logger.debug("start_caching") - - self._state_cache = ExpiringCache( - cache_name="state_cache", - clock=self.clock, - max_len=SIZE_OF_CACHE, - expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, - iterable=True, - reset_expiry_on_get=True, - ) - - self._state_cache.start() - - @defer.inlineCallbacks - @log_function - def resolve_state_groups( - self, room_id, state_groups_ids, event_map, state_map_factory, - ): - """Resolves conflicts between a set of state groups - - Always generates a new state group (unless we hit the cache), so should - not be called for a single state group - - Args: - room_id (str): room we are resolving for (used for logging) - state_groups_ids (dict[int, dict[(str, str), str]]): - map from state group id to the state in that state group - (where 'state' is a map from state key to event id) - - event_map(dict[str,FrozenEvent]|None): - a dict from event_id to event, for any events that we happen to - have in flight (eg, those currently being persisted). This will be - used as a starting point fof finding the state we need; any missing - events will be requested via state_map_factory. - - If None, all events will be fetched via state_map_factory. - - Returns: - Deferred[_StateCacheEntry]: resolved state - """ - logger.debug( - "resolve_state_groups state_groups %s", - state_groups_ids.keys() - ) - - group_names = frozenset(state_groups_ids.keys()) - - with (yield self.resolve_linearizer.queue(group_names)): - if self._state_cache is not None: - cache = self._state_cache.get(group_names, None) - if cache: - defer.returnValue(cache) - - logger.info( - "Resolving state for %s with %d groups", room_id, len(state_groups_ids) - ) - - # start by assuming we won't have any conflicted state, and build up the new - # state map by iterating through the state groups. If we discover a conflict, - # we give up and instead use `resolve_events_with_factory`. - # - # XXX: is this actually worthwhile, or should we just let - # resolve_events_with_factory do it? - new_state = {} - conflicted_state = False - for st in itervalues(state_groups_ids): - for key, e_id in iteritems(st): - if key in new_state: - conflicted_state = True - break - new_state[key] = e_id - if conflicted_state: - break - - if conflicted_state: - logger.info("Resolving conflicted state for %r", room_id) - with Measure(self.clock, "state._resolve_events"): - new_state = yield resolve_events_with_factory( - list(itervalues(state_groups_ids)), - event_map=event_map, - state_map_factory=state_map_factory, - ) - - # if the new state matches any of the input state groups, we can - # use that state group again. Otherwise we will generate a state_id - # which will be used as a cache key for future resolutions, but - # not get persisted. - - with Measure(self.clock, "state.create_group_ids"): - cache = _make_state_cache_entry(new_state, state_groups_ids) - - if self._state_cache is not None: - self._state_cache[group_names] = cache - - defer.returnValue(cache) - - -def _make_state_cache_entry( - new_state, - state_groups_ids, -): - """Given a resolved state, and a set of input state groups, pick one to base - a new state group on (if any), and return an appropriately-constructed - _StateCacheEntry. - - Args: - new_state (dict[(str, str), str]): resolved state map (mapping from - (type, state_key) to event_id) - - state_groups_ids (dict[int, dict[(str, str), str]]): - map from state group id to the state in that state group - (where 'state' is a map from state key to event id) - - Returns: - _StateCacheEntry - """ - # if the new state matches any of the input state groups, we can - # use that state group again. Otherwise we will generate a state_id - # which will be used as a cache key for future resolutions, but - # not get persisted. - - # first look for exact matches - new_state_event_ids = set(itervalues(new_state)) - for sg, state in iteritems(state_groups_ids): - if len(new_state_event_ids) != len(state): - continue - - old_state_event_ids = set(itervalues(state)) - if new_state_event_ids == old_state_event_ids: - # got an exact match. - return _StateCacheEntry( - state=new_state, - state_group=sg, - ) - - # TODO: We want to create a state group for this set of events, to - # increase cache hits, but we need to make sure that it doesn't - # end up as a prev_group without being added to the database - - # failing that, look for the closest match. - prev_group = None - delta_ids = None - - for old_group, old_state in iteritems(state_groups_ids): - n_delta_ids = { - k: v - for k, v in iteritems(new_state) - if old_state.get(k) != v - } - if not delta_ids or len(n_delta_ids) < len(delta_ids): - prev_group = old_group - delta_ids = n_delta_ids - - return _StateCacheEntry( - state=new_state, - state_group=None, - prev_group=prev_group, - delta_ids=delta_ids, - ) - - -def _ordered_events(events): - def key_func(e): - return -int(e.depth), hashlib.sha1(e.event_id.encode('ascii')).hexdigest() - - return sorted(events, key=key_func) - - -def resolve_events_with_state_map(state_sets, state_map): - """ - Args: - state_sets(list): List of dicts of (type, state_key) -> event_id, - which are the different state groups to resolve. - state_map(dict): a dict from event_id to event, for all events in - state_sets. - - Returns - dict[(str, str), str]: - a map from (type, state_key) to event_id. - """ - if len(state_sets) == 1: - return state_sets[0] - - unconflicted_state, conflicted_state = _seperate( - state_sets, - ) - - auth_events = _create_auth_events_from_maps( - unconflicted_state, conflicted_state, state_map - ) - - return _resolve_with_state( - unconflicted_state, conflicted_state, auth_events, state_map - ) - - -def _seperate(state_sets): - """Takes the state_sets and figures out which keys are conflicted and - which aren't. i.e., which have multiple different event_ids associated - with them in different state sets. - - Args: - state_sets(iterable[dict[(str, str), str]]): - List of dicts of (type, state_key) -> event_id, which are the - different state groups to resolve. - - Returns: - (dict[(str, str), str], dict[(str, str), set[str]]): - A tuple of (unconflicted_state, conflicted_state), where: - - unconflicted_state is a dict mapping (type, state_key)->event_id - for unconflicted state keys. - - conflicted_state is a dict mapping (type, state_key) to a set of - event ids for conflicted state keys. - """ - state_set_iterator = iter(state_sets) - unconflicted_state = dict(next(state_set_iterator)) - conflicted_state = {} - - for state_set in state_set_iterator: - for key, value in iteritems(state_set): - # Check if there is an unconflicted entry for the state key. - unconflicted_value = unconflicted_state.get(key) - if unconflicted_value is None: - # There isn't an unconflicted entry so check if there is a - # conflicted entry. - ls = conflicted_state.get(key) - if ls is None: - # There wasn't a conflicted entry so haven't seen this key before. - # Therefore it isn't conflicted yet. - unconflicted_state[key] = value - else: - # This key is already conflicted, add our value to the conflict set. - ls.add(value) - elif unconflicted_value != value: - # If the unconflicted value is not the same as our value then we - # have a new conflict. So move the key from the unconflicted_state - # to the conflicted state. - conflicted_state[key] = {value, unconflicted_value} - unconflicted_state.pop(key, None) - - return unconflicted_state, conflicted_state - - -@defer.inlineCallbacks -def resolve_events_with_factory(state_sets, event_map, state_map_factory): - """ - Args: - state_sets(list): List of dicts of (type, state_key) -> event_id, - which are the different state groups to resolve. - - event_map(dict[str,FrozenEvent]|None): - a dict from event_id to event, for any events that we happen to - have in flight (eg, those currently being persisted). This will be - used as a starting point fof finding the state we need; any missing - events will be requested via state_map_factory. - - If None, all events will be fetched via state_map_factory. - - state_map_factory(func): will be called - with a list of event_ids that are needed, and should return with - a Deferred of dict of event_id to event. - - Returns - Deferred[dict[(str, str), str]]: - a map from (type, state_key) to event_id. - """ - if len(state_sets) == 1: - defer.returnValue(state_sets[0]) - - unconflicted_state, conflicted_state = _seperate( - state_sets, - ) - - needed_events = set( - event_id - for event_ids in itervalues(conflicted_state) - for event_id in event_ids - ) - if event_map is not None: - needed_events -= set(iterkeys(event_map)) - - logger.info("Asking for %d conflicted events", len(needed_events)) - - # dict[str, FrozenEvent]: a map from state event id to event. Only includes - # the state events which are in conflict (and those in event_map) - state_map = yield state_map_factory(needed_events) - if event_map is not None: - state_map.update(event_map) - - # get the ids of the auth events which allow us to authenticate the - # conflicted state, picking only from the unconflicting state. - # - # dict[(str, str), str]: a map from state key to event id - auth_events = _create_auth_events_from_maps( - unconflicted_state, conflicted_state, state_map - ) - - new_needed_events = set(itervalues(auth_events)) - new_needed_events -= needed_events - if event_map is not None: - new_needed_events -= set(iterkeys(event_map)) - - logger.info("Asking for %d auth events", len(new_needed_events)) - - state_map_new = yield state_map_factory(new_needed_events) - state_map.update(state_map_new) - - defer.returnValue(_resolve_with_state( - unconflicted_state, conflicted_state, auth_events, state_map - )) - - -def _create_auth_events_from_maps(unconflicted_state, conflicted_state, state_map): - auth_events = {} - for event_ids in itervalues(conflicted_state): - for event_id in event_ids: - if event_id in state_map: - keys = event_auth.auth_types_for_event(state_map[event_id]) - for key in keys: - if key not in auth_events: - event_id = unconflicted_state.get(key, None) - if event_id: - auth_events[key] = event_id - return auth_events - - -def _resolve_with_state(unconflicted_state_ids, conflicted_state_ids, auth_event_ids, - state_map): - conflicted_state = {} - for key, event_ids in iteritems(conflicted_state_ids): - events = [state_map[ev_id] for ev_id in event_ids if ev_id in state_map] - if len(events) > 1: - conflicted_state[key] = events - elif len(events) == 1: - unconflicted_state_ids[key] = events[0].event_id - - auth_events = { - key: state_map[ev_id] - for key, ev_id in iteritems(auth_event_ids) - if ev_id in state_map - } - - try: - resolved_state = _resolve_state_events( - conflicted_state, auth_events - ) - except Exception: - logger.exception("Failed to resolve state") - raise - - new_state = unconflicted_state_ids - for key, event in iteritems(resolved_state): - new_state[key] = event.event_id - - return new_state - - -def _resolve_state_events(conflicted_state, auth_events): - """ This is where we actually decide which of the conflicted state to - use. - - We resolve conflicts in the following order: - 1. power levels - 2. join rules - 3. memberships - 4. other events. - """ - resolved_state = {} - if POWER_KEY in conflicted_state: - events = conflicted_state[POWER_KEY] - logger.debug("Resolving conflicted power levels %r", events) - resolved_state[POWER_KEY] = _resolve_auth_events( - events, auth_events) - - auth_events.update(resolved_state) - - for key, events in iteritems(conflicted_state): - if key[0] == EventTypes.JoinRules: - logger.debug("Resolving conflicted join rules %r", events) - resolved_state[key] = _resolve_auth_events( - events, - auth_events - ) - - auth_events.update(resolved_state) - - for key, events in iteritems(conflicted_state): - if key[0] == EventTypes.Member: - logger.debug("Resolving conflicted member lists %r", events) - resolved_state[key] = _resolve_auth_events( - events, - auth_events - ) - - auth_events.update(resolved_state) - - for key, events in iteritems(conflicted_state): - if key not in resolved_state: - logger.debug("Resolving conflicted state %r:%r", key, events) - resolved_state[key] = _resolve_normal_events( - events, auth_events - ) - - return resolved_state - - -def _resolve_auth_events(events, auth_events): - reverse = [i for i in reversed(_ordered_events(events))] - - auth_keys = set( - key - for event in events - for key in event_auth.auth_types_for_event(event) - ) - - new_auth_events = {} - for key in auth_keys: - auth_event = auth_events.get(key, None) - if auth_event: - new_auth_events[key] = auth_event - - auth_events = new_auth_events - - prev_event = reverse[0] - for event in reverse[1:]: - auth_events[(prev_event.type, prev_event.state_key)] = prev_event - try: - # The signatures have already been checked at this point - event_auth.check(event, auth_events, do_sig_check=False, do_size_check=False) - prev_event = event - except AuthError: - return prev_event - - return event - - -def _resolve_normal_events(events, auth_events): - for event in _ordered_events(events): - try: - # The signatures have already been checked at this point - event_auth.check(event, auth_events, do_sig_check=False, do_size_check=False) - return event - except AuthError: - pass - - # Use the last event (the one with the least depth) if they all fail - # the auth check. - return event diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py new file mode 100644 index 0000000000..8c091d07c9 --- /dev/null +++ b/synapse/state/__init__.py @@ -0,0 +1,574 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from collections import namedtuple + +from six import iteritems, itervalues + +from frozendict import frozendict + +from twisted.internet import defer + +from synapse.api.constants import EventTypes +from synapse.events.snapshot import EventContext +from synapse.util.async import Linearizer +from synapse.util.caches import CACHE_SIZE_FACTOR +from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.logutils import log_function +from synapse.util.metrics import Measure + +from .v1 import resolve_events_with_factory, resolve_events_with_state_map + +logger = logging.getLogger(__name__) + + +KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key")) + + +SIZE_OF_CACHE = int(100000 * CACHE_SIZE_FACTOR) +EVICTION_TIMEOUT_SECONDS = 60 * 60 + + +_NEXT_STATE_ID = 1 + +POWER_KEY = (EventTypes.PowerLevels, "") + + +def _gen_state_id(): + global _NEXT_STATE_ID + s = "X%d" % (_NEXT_STATE_ID,) + _NEXT_STATE_ID += 1 + return s + + +class _StateCacheEntry(object): + __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"] + + def __init__(self, state, state_group, prev_group=None, delta_ids=None): + # dict[(str, str), str] map from (type, state_key) to event_id + self.state = frozendict(state) + + # the ID of a state group if one and only one is involved. + # otherwise, None otherwise? + self.state_group = state_group + + self.prev_group = prev_group + self.delta_ids = frozendict(delta_ids) if delta_ids is not None else None + + # The `state_id` is a unique ID we generate that can be used as ID for + # this collection of state. Usually this would be the same as the + # state group, but on worker instances we can't generate a new state + # group each time we resolve state, so we generate a separate one that + # isn't persisted and is used solely for caches. + # `state_id` is either a state_group (and so an int) or a string. This + # ensures we don't accidentally persist a state_id as a stateg_group + if state_group: + self.state_id = state_group + else: + self.state_id = _gen_state_id() + + def __len__(self): + return len(self.state) + + +class StateHandler(object): + """Fetches bits of state from the stores, and does state resolution + where necessary + """ + + def __init__(self, hs): + self.clock = hs.get_clock() + self.store = hs.get_datastore() + self.hs = hs + self._state_resolution_handler = hs.get_state_resolution_handler() + + def start_caching(self): + # TODO: remove this shim + self._state_resolution_handler.start_caching() + + @defer.inlineCallbacks + def get_current_state(self, room_id, event_type=None, state_key="", + latest_event_ids=None): + """ Retrieves the current state for the room. This is done by + calling `get_latest_events_in_room` to get the leading edges of the + event graph and then resolving any of the state conflicts. + + This is equivalent to getting the state of an event that were to send + next before receiving any new events. + + If `event_type` is specified, then the method returns only the one + event (or None) with that `event_type` and `state_key`. + + Returns: + map from (type, state_key) to event + """ + if not latest_event_ids: + latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + + logger.debug("calling resolve_state_groups from get_current_state") + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) + state = ret.state + + if event_type: + event_id = state.get((event_type, state_key)) + event = None + if event_id: + event = yield self.store.get_event(event_id, allow_none=True) + defer.returnValue(event) + return + + state_map = yield self.store.get_events(list(state.values()), + get_prev_content=False) + state = { + key: state_map[e_id] for key, e_id in iteritems(state) if e_id in state_map + } + + defer.returnValue(state) + + @defer.inlineCallbacks + def get_current_state_ids(self, room_id, latest_event_ids=None): + """Get the current state, or the state at a set of events, for a room + + Args: + room_id (str): + + latest_event_ids (iterable[str]|None): if given, the forward + extremities to resolve. If None, we look them up from the + database (via a cache) + + Returns: + Deferred[dict[(str, str), str)]]: the state dict, mapping from + (event_type, state_key) -> event_id + """ + if not latest_event_ids: + latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + + logger.debug("calling resolve_state_groups from get_current_state_ids") + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) + state = ret.state + + defer.returnValue(state) + + @defer.inlineCallbacks + def get_current_user_in_room(self, room_id, latest_event_ids=None): + if not latest_event_ids: + latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + logger.debug("calling resolve_state_groups from get_current_user_in_room") + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) + joined_users = yield self.store.get_joined_users_from_state(room_id, entry) + defer.returnValue(joined_users) + + @defer.inlineCallbacks + def get_current_hosts_in_room(self, room_id, latest_event_ids=None): + if not latest_event_ids: + latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + logger.debug("calling resolve_state_groups from get_current_hosts_in_room") + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) + joined_hosts = yield self.store.get_joined_hosts(room_id, entry) + defer.returnValue(joined_hosts) + + @defer.inlineCallbacks + def compute_event_context(self, event, old_state=None): + """Build an EventContext structure for the event. + + This works out what the current state should be for the event, and + generates a new state group if necessary. + + Args: + event (synapse.events.EventBase): + old_state (dict|None): The state at the event if it can't be + calculated from existing events. This is normally only specified + when receiving an event from federation where we don't have the + prev events for, e.g. when backfilling. + Returns: + synapse.events.snapshot.EventContext: + """ + + if event.internal_metadata.is_outlier(): + # If this is an outlier, then we know it shouldn't have any current + # state. Certainly store.get_current_state won't return any, and + # persisting the event won't store the state group. + if old_state: + prev_state_ids = { + (s.type, s.state_key): s.event_id for s in old_state + } + if event.is_state(): + current_state_ids = dict(prev_state_ids) + key = (event.type, event.state_key) + current_state_ids[key] = event.event_id + else: + current_state_ids = prev_state_ids + else: + current_state_ids = {} + prev_state_ids = {} + + # We don't store state for outliers, so we don't generate a state + # group for it. + context = EventContext.with_state( + state_group=None, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + ) + + defer.returnValue(context) + + if old_state: + # We already have the state, so we don't need to calculate it. + # Let's just correctly fill out the context and create a + # new state group for it. + + prev_state_ids = { + (s.type, s.state_key): s.event_id for s in old_state + } + + if event.is_state(): + key = (event.type, event.state_key) + if key in prev_state_ids: + replaces = prev_state_ids[key] + if replaces != event.event_id: # Paranoia check + event.unsigned["replaces_state"] = replaces + current_state_ids = dict(prev_state_ids) + current_state_ids[key] = event.event_id + else: + current_state_ids = prev_state_ids + + state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=None, + delta_ids=None, + current_state_ids=current_state_ids, + ) + + context = EventContext.with_state( + state_group=state_group, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + ) + + defer.returnValue(context) + + logger.debug("calling resolve_state_groups from compute_event_context") + entry = yield self.resolve_state_groups_for_events( + event.room_id, [e for e, _ in event.prev_events], + ) + + prev_state_ids = entry.state + prev_group = None + delta_ids = None + + if event.is_state(): + # If this is a state event then we need to create a new state + # group for the state after this event. + + key = (event.type, event.state_key) + if key in prev_state_ids: + replaces = prev_state_ids[key] + event.unsigned["replaces_state"] = replaces + + current_state_ids = dict(prev_state_ids) + current_state_ids[key] = event.event_id + + if entry.state_group: + # If the state at the event has a state group assigned then + # we can use that as the prev group + prev_group = entry.state_group + delta_ids = { + key: event.event_id + } + elif entry.prev_group: + # If the state at the event only has a prev group, then we can + # use that as a prev group too. + prev_group = entry.prev_group + delta_ids = dict(entry.delta_ids) + delta_ids[key] = event.event_id + + state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=prev_group, + delta_ids=delta_ids, + current_state_ids=current_state_ids, + ) + else: + current_state_ids = prev_state_ids + prev_group = entry.prev_group + delta_ids = entry.delta_ids + + if entry.state_group is None: + entry.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=entry.prev_group, + delta_ids=entry.delta_ids, + current_state_ids=current_state_ids, + ) + entry.state_id = entry.state_group + + state_group = entry.state_group + + context = EventContext.with_state( + state_group=state_group, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + prev_group=prev_group, + delta_ids=delta_ids, + ) + + defer.returnValue(context) + + @defer.inlineCallbacks + def resolve_state_groups_for_events(self, room_id, event_ids): + """ Given a list of event_ids this method fetches the state at each + event, resolves conflicts between them and returns them. + + Args: + room_id (str): + event_ids (list[str]): + + Returns: + Deferred[_StateCacheEntry]: resolved state + """ + logger.debug("resolve_state_groups event_ids %s", event_ids) + + # map from state group id to the state in that state group (where + # 'state' is a map from state key to event id) + # dict[int, dict[(str, str), str]] + state_groups_ids = yield self.store.get_state_groups_ids( + room_id, event_ids + ) + + if len(state_groups_ids) == 1: + name, state_list = list(state_groups_ids.items()).pop() + + prev_group, delta_ids = yield self.store.get_state_group_delta(name) + + defer.returnValue(_StateCacheEntry( + state=state_list, + state_group=name, + prev_group=prev_group, + delta_ids=delta_ids, + )) + + result = yield self._state_resolution_handler.resolve_state_groups( + room_id, state_groups_ids, None, self._state_map_factory, + ) + defer.returnValue(result) + + def _state_map_factory(self, ev_ids): + return self.store.get_events( + ev_ids, get_prev_content=False, check_redacted=False, + ) + + def resolve_events(self, state_sets, event): + logger.info( + "Resolving state for %s with %d groups", event.room_id, len(state_sets) + ) + state_set_ids = [{ + (ev.type, ev.state_key): ev.event_id + for ev in st + } for st in state_sets] + + state_map = { + ev.event_id: ev + for st in state_sets + for ev in st + } + + with Measure(self.clock, "state._resolve_events"): + new_state = resolve_events_with_state_map(state_set_ids, state_map) + + new_state = { + key: state_map[ev_id] for key, ev_id in iteritems(new_state) + } + + return new_state + + +class StateResolutionHandler(object): + """Responsible for doing state conflict resolution. + + Note that the storage layer depends on this handler, so all functions must + be storage-independent. + """ + def __init__(self, hs): + self.clock = hs.get_clock() + + # dict of set of event_ids -> _StateCacheEntry. + self._state_cache = None + self.resolve_linearizer = Linearizer(name="state_resolve_lock") + + def start_caching(self): + logger.debug("start_caching") + + self._state_cache = ExpiringCache( + cache_name="state_cache", + clock=self.clock, + max_len=SIZE_OF_CACHE, + expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, + iterable=True, + reset_expiry_on_get=True, + ) + + self._state_cache.start() + + @defer.inlineCallbacks + @log_function + def resolve_state_groups( + self, room_id, state_groups_ids, event_map, state_map_factory, + ): + """Resolves conflicts between a set of state groups + + Always generates a new state group (unless we hit the cache), so should + not be called for a single state group + + Args: + room_id (str): room we are resolving for (used for logging) + state_groups_ids (dict[int, dict[(str, str), str]]): + map from state group id to the state in that state group + (where 'state' is a map from state key to event id) + + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + + Returns: + Deferred[_StateCacheEntry]: resolved state + """ + logger.debug( + "resolve_state_groups state_groups %s", + state_groups_ids.keys() + ) + + group_names = frozenset(state_groups_ids.keys()) + + with (yield self.resolve_linearizer.queue(group_names)): + if self._state_cache is not None: + cache = self._state_cache.get(group_names, None) + if cache: + defer.returnValue(cache) + + logger.info( + "Resolving state for %s with %d groups", room_id, len(state_groups_ids) + ) + + # start by assuming we won't have any conflicted state, and build up the new + # state map by iterating through the state groups. If we discover a conflict, + # we give up and instead use `resolve_events_with_factory`. + # + # XXX: is this actually worthwhile, or should we just let + # resolve_events_with_factory do it? + new_state = {} + conflicted_state = False + for st in itervalues(state_groups_ids): + for key, e_id in iteritems(st): + if key in new_state: + conflicted_state = True + break + new_state[key] = e_id + if conflicted_state: + break + + if conflicted_state: + logger.info("Resolving conflicted state for %r", room_id) + with Measure(self.clock, "state._resolve_events"): + new_state = yield resolve_events_with_factory( + list(itervalues(state_groups_ids)), + event_map=event_map, + state_map_factory=state_map_factory, + ) + + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. + + with Measure(self.clock, "state.create_group_ids"): + cache = _make_state_cache_entry(new_state, state_groups_ids) + + if self._state_cache is not None: + self._state_cache[group_names] = cache + + defer.returnValue(cache) + + +def _make_state_cache_entry( + new_state, + state_groups_ids, +): + """Given a resolved state, and a set of input state groups, pick one to base + a new state group on (if any), and return an appropriately-constructed + _StateCacheEntry. + + Args: + new_state (dict[(str, str), str]): resolved state map (mapping from + (type, state_key) to event_id) + + state_groups_ids (dict[int, dict[(str, str), str]]): + map from state group id to the state in that state group + (where 'state' is a map from state key to event id) + + Returns: + _StateCacheEntry + """ + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. + + # first look for exact matches + new_state_event_ids = set(itervalues(new_state)) + for sg, state in iteritems(state_groups_ids): + if len(new_state_event_ids) != len(state): + continue + + old_state_event_ids = set(itervalues(state)) + if new_state_event_ids == old_state_event_ids: + # got an exact match. + return _StateCacheEntry( + state=new_state, + state_group=sg, + ) + + # TODO: We want to create a state group for this set of events, to + # increase cache hits, but we need to make sure that it doesn't + # end up as a prev_group without being added to the database + + # failing that, look for the closest match. + prev_group = None + delta_ids = None + + for old_group, old_state in iteritems(state_groups_ids): + n_delta_ids = { + k: v + for k, v in iteritems(new_state) + if old_state.get(k) != v + } + if not delta_ids or len(n_delta_ids) < len(delta_ids): + prev_group = old_group + delta_ids = n_delta_ids + + return _StateCacheEntry( + state=new_state, + state_group=None, + prev_group=prev_group, + delta_ids=delta_ids, + ) diff --git a/synapse/state/v1.py b/synapse/state/v1.py new file mode 100644 index 0000000000..3a1f7054a1 --- /dev/null +++ b/synapse/state/v1.py @@ -0,0 +1,321 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import logging + +from six import iteritems, iterkeys, itervalues + +from twisted.internet import defer + +from synapse import event_auth +from synapse.api.constants import EventTypes +from synapse.api.errors import AuthError + +logger = logging.getLogger(__name__) + + +POWER_KEY = (EventTypes.PowerLevels, "") + + +def resolve_events_with_state_map(state_sets, state_map): + """ + Args: + state_sets(list): List of dicts of (type, state_key) -> event_id, + which are the different state groups to resolve. + state_map(dict): a dict from event_id to event, for all events in + state_sets. + + Returns + dict[(str, str), str]: + a map from (type, state_key) to event_id. + """ + if len(state_sets) == 1: + return state_sets[0] + + unconflicted_state, conflicted_state = _seperate( + state_sets, + ) + + auth_events = _create_auth_events_from_maps( + unconflicted_state, conflicted_state, state_map + ) + + return _resolve_with_state( + unconflicted_state, conflicted_state, auth_events, state_map + ) + + +@defer.inlineCallbacks +def resolve_events_with_factory(state_sets, event_map, state_map_factory): + """ + Args: + state_sets(list): List of dicts of (type, state_key) -> event_id, + which are the different state groups to resolve. + + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + + state_map_factory(func): will be called + with a list of event_ids that are needed, and should return with + a Deferred of dict of event_id to event. + + Returns + Deferred[dict[(str, str), str]]: + a map from (type, state_key) to event_id. + """ + if len(state_sets) == 1: + defer.returnValue(state_sets[0]) + + unconflicted_state, conflicted_state = _seperate( + state_sets, + ) + + needed_events = set( + event_id + for event_ids in itervalues(conflicted_state) + for event_id in event_ids + ) + if event_map is not None: + needed_events -= set(iterkeys(event_map)) + + logger.info("Asking for %d conflicted events", len(needed_events)) + + # dict[str, FrozenEvent]: a map from state event id to event. Only includes + # the state events which are in conflict (and those in event_map) + state_map = yield state_map_factory(needed_events) + if event_map is not None: + state_map.update(event_map) + + # get the ids of the auth events which allow us to authenticate the + # conflicted state, picking only from the unconflicting state. + # + # dict[(str, str), str]: a map from state key to event id + auth_events = _create_auth_events_from_maps( + unconflicted_state, conflicted_state, state_map + ) + + new_needed_events = set(itervalues(auth_events)) + new_needed_events -= needed_events + if event_map is not None: + new_needed_events -= set(iterkeys(event_map)) + + logger.info("Asking for %d auth events", len(new_needed_events)) + + state_map_new = yield state_map_factory(new_needed_events) + state_map.update(state_map_new) + + defer.returnValue(_resolve_with_state( + unconflicted_state, conflicted_state, auth_events, state_map + )) + + +def _seperate(state_sets): + """Takes the state_sets and figures out which keys are conflicted and + which aren't. i.e., which have multiple different event_ids associated + with them in different state sets. + + Args: + state_sets(iterable[dict[(str, str), str]]): + List of dicts of (type, state_key) -> event_id, which are the + different state groups to resolve. + + Returns: + (dict[(str, str), str], dict[(str, str), set[str]]): + A tuple of (unconflicted_state, conflicted_state), where: + + unconflicted_state is a dict mapping (type, state_key)->event_id + for unconflicted state keys. + + conflicted_state is a dict mapping (type, state_key) to a set of + event ids for conflicted state keys. + """ + state_set_iterator = iter(state_sets) + unconflicted_state = dict(next(state_set_iterator)) + conflicted_state = {} + + for state_set in state_set_iterator: + for key, value in iteritems(state_set): + # Check if there is an unconflicted entry for the state key. + unconflicted_value = unconflicted_state.get(key) + if unconflicted_value is None: + # There isn't an unconflicted entry so check if there is a + # conflicted entry. + ls = conflicted_state.get(key) + if ls is None: + # There wasn't a conflicted entry so haven't seen this key before. + # Therefore it isn't conflicted yet. + unconflicted_state[key] = value + else: + # This key is already conflicted, add our value to the conflict set. + ls.add(value) + elif unconflicted_value != value: + # If the unconflicted value is not the same as our value then we + # have a new conflict. So move the key from the unconflicted_state + # to the conflicted state. + conflicted_state[key] = {value, unconflicted_value} + unconflicted_state.pop(key, None) + + return unconflicted_state, conflicted_state + + +def _create_auth_events_from_maps(unconflicted_state, conflicted_state, state_map): + auth_events = {} + for event_ids in itervalues(conflicted_state): + for event_id in event_ids: + if event_id in state_map: + keys = event_auth.auth_types_for_event(state_map[event_id]) + for key in keys: + if key not in auth_events: + event_id = unconflicted_state.get(key, None) + if event_id: + auth_events[key] = event_id + return auth_events + + +def _resolve_with_state(unconflicted_state_ids, conflicted_state_ids, auth_event_ids, + state_map): + conflicted_state = {} + for key, event_ids in iteritems(conflicted_state_ids): + events = [state_map[ev_id] for ev_id in event_ids if ev_id in state_map] + if len(events) > 1: + conflicted_state[key] = events + elif len(events) == 1: + unconflicted_state_ids[key] = events[0].event_id + + auth_events = { + key: state_map[ev_id] + for key, ev_id in iteritems(auth_event_ids) + if ev_id in state_map + } + + try: + resolved_state = _resolve_state_events( + conflicted_state, auth_events + ) + except Exception: + logger.exception("Failed to resolve state") + raise + + new_state = unconflicted_state_ids + for key, event in iteritems(resolved_state): + new_state[key] = event.event_id + + return new_state + + +def _resolve_state_events(conflicted_state, auth_events): + """ This is where we actually decide which of the conflicted state to + use. + + We resolve conflicts in the following order: + 1. power levels + 2. join rules + 3. memberships + 4. other events. + """ + resolved_state = {} + if POWER_KEY in conflicted_state: + events = conflicted_state[POWER_KEY] + logger.debug("Resolving conflicted power levels %r", events) + resolved_state[POWER_KEY] = _resolve_auth_events( + events, auth_events) + + auth_events.update(resolved_state) + + for key, events in iteritems(conflicted_state): + if key[0] == EventTypes.JoinRules: + logger.debug("Resolving conflicted join rules %r", events) + resolved_state[key] = _resolve_auth_events( + events, + auth_events + ) + + auth_events.update(resolved_state) + + for key, events in iteritems(conflicted_state): + if key[0] == EventTypes.Member: + logger.debug("Resolving conflicted member lists %r", events) + resolved_state[key] = _resolve_auth_events( + events, + auth_events + ) + + auth_events.update(resolved_state) + + for key, events in iteritems(conflicted_state): + if key not in resolved_state: + logger.debug("Resolving conflicted state %r:%r", key, events) + resolved_state[key] = _resolve_normal_events( + events, auth_events + ) + + return resolved_state + + +def _resolve_auth_events(events, auth_events): + reverse = [i for i in reversed(_ordered_events(events))] + + auth_keys = set( + key + for event in events + for key in event_auth.auth_types_for_event(event) + ) + + new_auth_events = {} + for key in auth_keys: + auth_event = auth_events.get(key, None) + if auth_event: + new_auth_events[key] = auth_event + + auth_events = new_auth_events + + prev_event = reverse[0] + for event in reverse[1:]: + auth_events[(prev_event.type, prev_event.state_key)] = prev_event + try: + # The signatures have already been checked at this point + event_auth.check(event, auth_events, do_sig_check=False, do_size_check=False) + prev_event = event + except AuthError: + return prev_event + + return event + + +def _resolve_normal_events(events, auth_events): + for event in _ordered_events(events): + try: + # The signatures have already been checked at this point + event_auth.check(event, auth_events, do_sig_check=False, do_size_check=False) + return event + except AuthError: + pass + + # Use the last event (the one with the least depth) if they all fail + # the auth check. + return event + + +def _ordered_events(events): + def key_func(e): + return -int(e.depth), hashlib.sha1(e.event_id.encode('ascii')).hexdigest() + + return sorted(events, key=key_func) -- cgit 1.5.1 From 152c0aa58edd1453f6c8c16a82cdb92165d0adba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 7 Aug 2018 15:27:19 +0100 Subject: Add constants for room versions --- synapse/api/constants.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index b0da506f6d..912bf024bf 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -97,9 +97,14 @@ class ThirdPartyEntityKind(object): LOCATION = "location" +class RoomVersions(object): + V1 = "1" + VDH_TEST = "vdh-test-version" + + # the version we will give rooms which are created on this server -DEFAULT_ROOM_VERSION = "1" +DEFAULT_ROOM_VERSION = RoomVersions.V1 # vdh-test-version is a placeholder to get room versioning support working and tested # until we have a working v2. -KNOWN_ROOM_VERSIONS = {"1", "vdh-test-version"} +KNOWN_ROOM_VERSIONS = {RoomVersions.V1, RoomVersions.VDH_TEST} -- cgit 1.5.1 From ce6db0e5473e31292567b11599eb334c3275c564 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Aug 2018 17:01:57 +0100 Subject: Choose state algorithm based on room version --- synapse/handlers/federation.py | 8 +++- synapse/handlers/room_member.py | 5 +- synapse/state/__init__.py | 104 +++++++++++++++++++++++++++++++++++----- synapse/storage/events.py | 4 +- 4 files changed, 105 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0dffd44e22..75a819dd11 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -274,8 +274,9 @@ class FederationHandler(BaseHandler): ev_ids, get_prev_content=False, check_redacted=False ) + room_version = yield self.store.get_room_version(pdu.room_id) state_map = yield resolve_events_with_factory( - state_groups, {pdu.event_id: pdu}, fetch + room_version, state_groups, {pdu.event_id: pdu}, fetch ) state = (yield self.store.get_events(state_map.values())).values() @@ -1811,7 +1812,10 @@ class FederationHandler(BaseHandler): (d.type, d.state_key): d for d in different_events if d }) - new_state = self.state_handler.resolve_events( + room_version = yield self.store.get_room_version(event.room_id) + + new_state = yield self.state_handler.resolve_events( + room_version, [list(local_view.values()), list(remote_view.values())], event ) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 0d4a3f4677..b6010bb41e 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -341,9 +341,10 @@ class RoomMemberHandler(object): prev_events_and_hashes = yield self.store.get_prev_events_for_room( room_id, ) - latest_event_ids = ( + latest_event_ids = [ event_id for (event_id, _, _) in prev_events_and_hashes - ) + ] + current_state_ids = yield self.state_handler.get_current_state_ids( room_id, latest_event_ids=latest_event_ids, ) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 8c091d07c9..222daa0b28 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -23,16 +23,15 @@ from frozendict import frozendict from twisted.internet import defer -from synapse.api.constants import EventTypes +from synapse.api.constants import EventTypes, RoomVersions from synapse.events.snapshot import EventContext +from synapse.state import v1 from synapse.util.async import Linearizer from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logutils import log_function from synapse.util.metrics import Measure -from .v1 import resolve_events_with_factory, resolve_events_with_state_map - logger = logging.getLogger(__name__) @@ -263,8 +262,14 @@ class StateHandler(object): defer.returnValue(context) logger.debug("calling resolve_state_groups from compute_event_context") + if event.type == EventTypes.Create: + room_version = event.content.get("room_version", RoomVersions.V1) + else: + room_version = None + entry = yield self.resolve_state_groups_for_events( event.room_id, [e for e, _ in event.prev_events], + explicit_room_version=room_version, ) prev_state_ids = entry.state @@ -332,13 +337,17 @@ class StateHandler(object): defer.returnValue(context) @defer.inlineCallbacks - def resolve_state_groups_for_events(self, room_id, event_ids): + def resolve_state_groups_for_events(self, room_id, event_ids, + explicit_room_version=None): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. Args: - room_id (str): - event_ids (list[str]): + room_id (str) + event_ids (list[str]) + explicit_room_version (str|None): If set uses the the given room + version to choose the resolution algorithm. If None, then + checks the database for room version. Returns: Deferred[_StateCacheEntry]: resolved state @@ -364,8 +373,13 @@ class StateHandler(object): delta_ids=delta_ids, )) + room_version = explicit_room_version + if not room_version: + room_version = yield self.store.get_room_version(room_id) + result = yield self._state_resolution_handler.resolve_state_groups( - room_id, state_groups_ids, None, self._state_map_factory, + room_id, room_version, state_groups_ids, None, + self._state_map_factory, ) defer.returnValue(result) @@ -374,7 +388,8 @@ class StateHandler(object): ev_ids, get_prev_content=False, check_redacted=False, ) - def resolve_events(self, state_sets, event): + @defer.inlineCallbacks + def resolve_events(self, room_version, state_sets, event): logger.info( "Resolving state for %s with %d groups", event.room_id, len(state_sets) ) @@ -389,14 +404,18 @@ class StateHandler(object): for ev in st } + room_version = yield self.store.get_room_version(event.room_id) + with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events_with_state_map(state_set_ids, state_map) + new_state = resolve_events_with_state_map( + room_version, state_set_ids, state_map, + ) new_state = { key: state_map[ev_id] for key, ev_id in iteritems(new_state) } - return new_state + defer.returnValue(new_state) class StateResolutionHandler(object): @@ -429,7 +448,7 @@ class StateResolutionHandler(object): @defer.inlineCallbacks @log_function def resolve_state_groups( - self, room_id, state_groups_ids, event_map, state_map_factory, + self, room_id, room_version, state_groups_ids, event_map, state_map_factory, ): """Resolves conflicts between a set of state groups @@ -438,6 +457,7 @@ class StateResolutionHandler(object): Args: room_id (str): room we are resolving for (used for logging) + room_version (str): version of the room state_groups_ids (dict[int, dict[(str, str), str]]): map from state group id to the state in that state group (where 'state' is a map from state key to event id) @@ -491,6 +511,7 @@ class StateResolutionHandler(object): logger.info("Resolving conflicted state for %r", room_id) with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( + room_version, list(itervalues(state_groups_ids)), event_map=event_map, state_map_factory=state_map_factory, @@ -572,3 +593,64 @@ def _make_state_cache_entry( prev_group=prev_group, delta_ids=delta_ids, ) + + +def resolve_events_with_state_map(room_version, state_sets, state_map): + """ + Args: + room_version(str): Version of the room + state_sets(list): List of dicts of (type, state_key) -> event_id, + which are the different state groups to resolve. + state_map(dict): a dict from event_id to event, for all events in + state_sets. + + Returns + dict[(str, str), str]: + a map from (type, state_key) to event_id. + """ + if room_version in (RoomVersions.V1, RoomVersions.VDH_TEST,): + return v1.resolve_events_with_state_map( + state_sets, state_map, + ) + else: + # This should only happen if we added a version but forgot to add it to + # the list above. + raise Exception( + "No state resolution algorithm defined for version %r" % (room_version,) + ) + + +def resolve_events_with_factory(room_version, state_sets, event_map, state_map_factory): + """ + Args: + room_version(str): Version of the room + + state_sets(list): List of dicts of (type, state_key) -> event_id, + which are the different state groups to resolve. + + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + + state_map_factory(func): will be called + with a list of event_ids that are needed, and should return with + a Deferred of dict of event_id to event. + + Returns + Deferred[dict[(str, str), str]]: + a map from (type, state_key) to event_id. + """ + if room_version in (RoomVersions.V1, RoomVersions.VDH_TEST,): + return v1.resolve_events_with_factory( + state_sets, event_map, state_map_factory, + ) + else: + # This should only happen if we added a version but forgot to add it to + # the list above. + raise Exception( + "No state resolution algorithm defined for version %r" % (room_version,) + ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ce32e8fefd..dc68d365c2 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -705,9 +705,11 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore } events_map = {ev.event_id: ev for ev, _ in events_context} + room_version = yield self.get_room_version(room_id) + logger.debug("calling resolve_state_groups from preserve_events") res = yield self._state_resolution_handler.resolve_state_groups( - room_id, state_groups, events_map, get_events + room_id, room_version, state_groups, events_map, get_events ) defer.returnValue((res.state, None)) -- cgit 1.5.1 From bb99b1f5507e009bc32a82ba6ef29ba2d91e6b65 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Aug 2018 14:44:10 +0100 Subject: Add fast path in state res for zero prev events --- synapse/state/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 222daa0b28..f1c26d1071 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -361,7 +361,12 @@ class StateHandler(object): room_id, event_ids ) - if len(state_groups_ids) == 1: + if len(state_groups_ids) == 0: + defer.returnValue(_StateCacheEntry( + state={}, + state_group=None, + )) + elif len(state_groups_ids) == 1: name, state_list = list(state_groups_ids.items()).pop() prev_group, delta_ids = yield self.store.get_state_group_delta(name) -- cgit 1.5.1 From 3e19beb941f3f797262b051d47227018898bb36f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Aug 2018 14:33:49 +0100 Subject: Fix tests --- tests/replication/slave/storage/test_events.py | 3 ++- tests/storage/test_redaction.py | 4 +++- tests/storage/test_roommember.py | 4 +++- tests/test_state.py | 6 +++++- tests/test_visibility.py | 4 +++- tests/utils.py | 30 ++++++++++++++++++++++++++ 6 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index f5b47f5ec0..8c08985fa9 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -122,6 +122,7 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): @defer.inlineCallbacks def test_invites(self): + yield self.persist(type="m.room.create", key="", creator=USER_ID) yield self.check("get_invited_rooms_for_user", [USER_ID_2], []) event = yield self.persist( type="m.room.member", key=USER_ID_2, membership="invite" @@ -134,7 +135,7 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): @defer.inlineCallbacks def test_push_actions_for_user(self): - yield self.persist(type="m.room.create", creator=USER_ID) + yield self.persist(type="m.room.create", key="", creator=USER_ID) yield self.persist(type="m.room.join", key=USER_ID, membership="join") yield self.persist( type="m.room.join", sender=USER_ID, key=USER_ID_2, membership="join" diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 475ec900c4..05f40eaa94 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -22,7 +22,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.types import RoomID, UserID from tests import unittest -from tests.utils import setup_test_homeserver +from tests.utils import create_room, setup_test_homeserver class RedactionTestCase(unittest.TestCase): @@ -43,6 +43,8 @@ class RedactionTestCase(unittest.TestCase): self.room1 = RoomID.from_string("!abc123:test") + yield create_room(hs, self.room1.to_string(), self.u_alice.to_string()) + self.depth = 1 @defer.inlineCallbacks diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index c5fd54f67e..034ea3fbbf 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -22,7 +22,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.types import RoomID, UserID from tests import unittest -from tests.utils import setup_test_homeserver +from tests.utils import create_room, setup_test_homeserver class RoomMemberStoreTestCase(unittest.TestCase): @@ -47,6 +47,8 @@ class RoomMemberStoreTestCase(unittest.TestCase): self.room = RoomID.from_string("!abc123:test") + yield create_room(hs, self.room.to_string(), self.u_alice.to_string()) + @defer.inlineCallbacks def inject_room_member(self, room, user, membership, replaces_state=None): builder = self.event_builder_factory.new({ diff --git a/tests/test_state.py b/tests/test_state.py index 429a18cbf7..770e94437a 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -18,7 +18,7 @@ from mock import Mock from twisted.internet import defer from synapse.api.auth import Auth -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes, Membership, RoomVersions from synapse.events import FrozenEvent from synapse.state import StateHandler, StateResolutionHandler @@ -108,6 +108,9 @@ class StateGroupStore(object): def register_event_id_state_group(self, event_id, state_group): self._event_to_state_group[event_id] = state_group + def get_room_version(self, room_id): + return RoomVersions.V1 + class DictObj(dict): def __init__(self, **kwargs): @@ -167,6 +170,7 @@ class StateTestCase(unittest.TestCase): "START": DictObj( type=EventTypes.Create, state_key="", + content={}, depth=1, ), "A": DictObj( diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 0dc1a924d3..15ebb0aa08 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -21,7 +21,7 @@ from synapse.events import FrozenEvent from synapse.visibility import filter_events_for_server import tests.unittest -from tests.utils import setup_test_homeserver +from tests.utils import create_room, setup_test_homeserver logger = logging.getLogger(__name__) @@ -36,6 +36,8 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): self.event_builder_factory = self.hs.get_event_builder_factory() self.store = self.hs.get_datastore() + yield create_room(self.hs, TEST_ROOM_ID, "@someone:ROOM") + @defer.inlineCallbacks def test_filtering(self): # diff --git a/tests/utils.py b/tests/utils.py index 3f17304934..8930a3a230 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -21,6 +21,7 @@ from six.moves.urllib import parse as urlparse from twisted.internet import defer, reactor +from synapse.api.constants import EventTypes from synapse.api.errors import CodeMessageException, cs_error from synapse.federation.transport import server from synapse.http.server import HttpServer @@ -445,3 +446,32 @@ class DeferredMockCallable(object): "call(%s)" % _format_call(c[0], c[1]) for c in calls ]) ) + + +@defer.inlineCallbacks +def create_room(hs, room_id, creator_id): + """Creates and persist a creation event for the given room + + Args: + hs + room_id (str) + creator_id (str) + """ + + store = hs.get_datastore() + event_builder_factory = hs.get_event_builder_factory() + event_creation_handler = hs.get_event_creation_handler() + + builder = event_builder_factory.new({ + "type": EventTypes.Create, + "state_key": "", + "sender": creator_id, + "room_id": room_id, + "content": {}, + }) + + event, context = yield event_creation_handler.create_new_client_event( + builder + ) + + yield store.persist_event(event, context) -- cgit 1.5.1 From 5075e444f413b0b564dbc525a641cb5784acf2ad Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Aug 2018 14:56:25 +0100 Subject: Newsfile --- changelog.d/3673.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3673.misc diff --git a/changelog.d/3673.misc b/changelog.d/3673.misc new file mode 100644 index 0000000000..d672111fb9 --- /dev/null +++ b/changelog.d/3673.misc @@ -0,0 +1 @@ +Refactor state module to support multiple room versions -- cgit 1.5.1 From e21c368b8b7c62b2f626ff6c739ea33c3c2e73bb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 Aug 2018 13:54:51 +0100 Subject: Revert spurious change --- synapse/handlers/room_member.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index b6010bb41e..b8e1af580b 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -341,9 +341,9 @@ class RoomMemberHandler(object): prev_events_and_hashes = yield self.store.get_prev_events_for_room( room_id, ) - latest_event_ids = [ + latest_event_ids = ( event_id for (event_id, _, _) in prev_events_and_hashes - ] + ) current_state_ids = yield self.state_handler.get_current_state_ids( room_id, latest_event_ids=latest_event_ids, -- cgit 1.5.1 From 8dee601054089311b81efd6239b458401da063e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 Aug 2018 14:21:12 +0100 Subject: Remove redundant room_version checks --- synapse/handlers/federation.py | 2 +- synapse/state/__init__.py | 17 +++-------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 75a819dd11..14a10c1229 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1814,7 +1814,7 @@ class FederationHandler(BaseHandler): room_version = yield self.store.get_room_version(event.room_id) - new_state = yield self.state_handler.resolve_events( + new_state = self.state_handler.resolve_events( room_version, [list(local_view.values()), list(remote_view.values())], event diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index f1c26d1071..a2992fd953 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -262,14 +262,9 @@ class StateHandler(object): defer.returnValue(context) logger.debug("calling resolve_state_groups from compute_event_context") - if event.type == EventTypes.Create: - room_version = event.content.get("room_version", RoomVersions.V1) - else: - room_version = None entry = yield self.resolve_state_groups_for_events( event.room_id, [e for e, _ in event.prev_events], - explicit_room_version=room_version, ) prev_state_ids = entry.state @@ -337,8 +332,7 @@ class StateHandler(object): defer.returnValue(context) @defer.inlineCallbacks - def resolve_state_groups_for_events(self, room_id, event_ids, - explicit_room_version=None): + def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -378,9 +372,7 @@ class StateHandler(object): delta_ids=delta_ids, )) - room_version = explicit_room_version - if not room_version: - room_version = yield self.store.get_room_version(room_id) + room_version = yield self.store.get_room_version(room_id) result = yield self._state_resolution_handler.resolve_state_groups( room_id, room_version, state_groups_ids, None, @@ -393,7 +385,6 @@ class StateHandler(object): ev_ids, get_prev_content=False, check_redacted=False, ) - @defer.inlineCallbacks def resolve_events(self, room_version, state_sets, event): logger.info( "Resolving state for %s with %d groups", event.room_id, len(state_sets) @@ -409,8 +400,6 @@ class StateHandler(object): for ev in st } - room_version = yield self.store.get_room_version(event.room_id) - with Measure(self.clock, "state._resolve_events"): new_state = resolve_events_with_state_map( room_version, state_set_ids, state_map, @@ -420,7 +409,7 @@ class StateHandler(object): key: state_map[ev_id] for key, ev_id in iteritems(new_state) } - defer.returnValue(new_state) + return new_state class StateResolutionHandler(object): -- cgit 1.5.1 From 9a2f960736a1d1aeb6c9c094887253b280a59a37 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 21 Aug 2018 00:00:19 +1000 Subject: version --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/__init__.py b/synapse/__init__.py index a14d578e36..5175a1161d 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,4 +17,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.33.2" +__version__ = "0.33.3rc1" -- cgit 1.5.1 From 80bf7d35802003cee6e495421ec54c1c08bce525 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 21 Aug 2018 00:01:14 +1000 Subject: changelog --- CHANGES.md | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ changelog.d/1491.feature | 1 - changelog.d/3184.feature | 1 - changelog.d/3423.misc | 1 - changelog.d/3568.feature | 1 - changelog.d/3574.feature | 1 - changelog.d/3589.feature | 1 - changelog.d/3632.misc | 1 - changelog.d/3633.feature | 1 - changelog.d/3647.misc | 1 - changelog.d/3653.feature | 1 - changelog.d/3654.feature | 1 - changelog.d/3655.feature | 1 - changelog.d/3658.bugfix | 1 - changelog.d/3660.misc | 1 - changelog.d/3661.bugfix | 1 - changelog.d/3662.feature | 1 - changelog.d/3664.feature | 1 - changelog.d/3668.misc | 1 - changelog.d/3669.misc | 1 - changelog.d/3670.feature | 1 - changelog.d/3676.bugfix | 1 - changelog.d/3677.bugfix | 1 - changelog.d/3678.misc | 1 - changelog.d/3679.misc | 1 - changelog.d/3681.bugfix | 1 - changelog.d/3684.misc | 1 - changelog.d/3687.feature | 1 - changelog.d/3689.bugfix | 1 - changelog.d/3690.misc | 1 - changelog.d/3692.bugfix | 1 - changelog.d/3694.feature | 1 - changelog.d/3700.bugfix | 1 - changelog.d/3701.bugfix | 1 - changelog.d/3703.removal | 1 - changelog.d/3705.bugfix | 1 - changelog.d/3707.misc | 1 - changelog.d/3708.feature | 1 - changelog.d/3709.misc | 1 - changelog.d/3710.bugfix | 1 - changelog.d/3712.misc | 1 - changelog.d/3713.bugfix | 1 - changelog.d/3719.bugfix | 1 - 43 files changed, 64 insertions(+), 42 deletions(-) delete mode 100644 changelog.d/1491.feature delete mode 100644 changelog.d/3184.feature delete mode 100644 changelog.d/3423.misc delete mode 100644 changelog.d/3568.feature delete mode 100644 changelog.d/3574.feature delete mode 100644 changelog.d/3589.feature delete mode 100644 changelog.d/3632.misc delete mode 100644 changelog.d/3633.feature delete mode 100644 changelog.d/3647.misc delete mode 100644 changelog.d/3653.feature delete mode 100644 changelog.d/3654.feature delete mode 100644 changelog.d/3655.feature delete mode 100644 changelog.d/3658.bugfix delete mode 100644 changelog.d/3660.misc delete mode 100644 changelog.d/3661.bugfix delete mode 100644 changelog.d/3662.feature delete mode 100644 changelog.d/3664.feature delete mode 100644 changelog.d/3668.misc delete mode 100644 changelog.d/3669.misc delete mode 100644 changelog.d/3670.feature delete mode 100644 changelog.d/3676.bugfix delete mode 100644 changelog.d/3677.bugfix delete mode 100644 changelog.d/3678.misc delete mode 100644 changelog.d/3679.misc delete mode 100644 changelog.d/3681.bugfix delete mode 100644 changelog.d/3684.misc delete mode 100644 changelog.d/3687.feature delete mode 100644 changelog.d/3689.bugfix delete mode 100644 changelog.d/3690.misc delete mode 100644 changelog.d/3692.bugfix delete mode 100644 changelog.d/3694.feature delete mode 100644 changelog.d/3700.bugfix delete mode 100644 changelog.d/3701.bugfix delete mode 100644 changelog.d/3703.removal delete mode 100644 changelog.d/3705.bugfix delete mode 100644 changelog.d/3707.misc delete mode 100644 changelog.d/3708.feature delete mode 100644 changelog.d/3709.misc delete mode 100644 changelog.d/3710.bugfix delete mode 100644 changelog.d/3712.misc delete mode 100644 changelog.d/3713.bugfix delete mode 100644 changelog.d/3719.bugfix diff --git a/CHANGES.md b/CHANGES.md index a299110a6b..68ce5d1471 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,67 @@ +Synapse 0.33.3rc1 (2018-08-21) +============================== + +Features +-------- + +- Add support for the SNI extension to federation TLS connections ([\#1491](https://github.com/matrix-org/synapse/issues/1491)) +- Add /_media/r0/config ([\#3184](https://github.com/matrix-org/synapse/issues/3184)) +- speed up /members API and add `at` and `membership` params as per MSC1227 ([\#3568](https://github.com/matrix-org/synapse/issues/3568)) +- implement `summary` block in /sync response as per MSC688 ([\#3574](https://github.com/matrix-org/synapse/issues/3574)) +- Add lazy-loading support to /messages as per MSC1227 ([\#3589](https://github.com/matrix-org/synapse/issues/3589)) +- Add ability to limit number of monthly active users on the server ([\#3633](https://github.com/matrix-org/synapse/issues/3633)) +- Support more federation endpoints on workers ([\#3653](https://github.com/matrix-org/synapse/issues/3653)) +- Basic support for room versioning ([\#3654](https://github.com/matrix-org/synapse/issues/3654)) +- Ability to disable client/server Synapse via conf toggle ([\#3655](https://github.com/matrix-org/synapse/issues/3655)) +- Ability to whitelist specific threepids against monthly active user limiting ([\#3662](https://github.com/matrix-org/synapse/issues/3662)) +- Add some metrics for the appservice and federation event sending loops ([\#3664](https://github.com/matrix-org/synapse/issues/3664)) +- Where server is disabled, block ability for locked out users to read new messages ([\#3670](https://github.com/matrix-org/synapse/issues/3670)) +- set admin uri via config, to be used in error messages where the user should contact the administrator ([\#3687](https://github.com/matrix-org/synapse/issues/3687)) +- Synapse's presence functionality can now be disabled with the "use_presence" configuration option. ([\#3694](https://github.com/matrix-org/synapse/issues/3694)) +- For resource limit blocked users, prevent writing into rooms ([\#3708](https://github.com/matrix-org/synapse/issues/3708)) + + +Bugfixes +-------- + +- Fix occasional glitches in the synapse_event_persisted_position metric ([\#3658](https://github.com/matrix-org/synapse/issues/3658)) +- Fix bug on deleting 3pid when using identity servers that don't support unbind API ([\#3661](https://github.com/matrix-org/synapse/issues/3661)) +- Make the tests pass on Twisted < 18.7.0 ([\#3676](https://github.com/matrix-org/synapse/issues/3676)) +- Don’t ship recaptcha_ajax.js, use it directly from Google ([\#3677](https://github.com/matrix-org/synapse/issues/3677)) +- Fixes test_reap_monthly_active_users so it passes under postgres ([\#3681](https://github.com/matrix-org/synapse/issues/3681)) +- Fix mau blocking calulation bug on login ([\#3689](https://github.com/matrix-org/synapse/issues/3689)) +- Fix missing yield in synapse.storage.monthly_active_users.initialise_reserved_users ([\#3692](https://github.com/matrix-org/synapse/issues/3692)) +- Improve HTTP request logging to include all requests ([\#3700](https://github.com/matrix-org/synapse/issues/3700)) +- Avoid timing out requests while we are streaming back the response ([\#3701](https://github.com/matrix-org/synapse/issues/3701)) +- Support more federation endpoints on workers ([\#3705](https://github.com/matrix-org/synapse/issues/3705), [\#3713](https://github.com/matrix-org/synapse/issues/3713)) +- Fix "Starting db txn 'get_all_updated_receipts' from sentinel context" warning ([\#3710](https://github.com/matrix-org/synapse/issues/3710)) +- Fix bug where `state_cache` cache factor ignored environment variables ([\#3719](https://github.com/matrix-org/synapse/issues/3719)) + + +Deprecations and Removals +------------------------- + +- The Shared-Secret registration method of the legacy v1/register REST endpoint has been removed. For a replacement, please see [the admin/register API documentation](https://github.com/matrix-org/synapse/blob/master/docs/admin_api/register_api.rst). ([\#3703](https://github.com/matrix-org/synapse/issues/3703)) + + +Internal Changes +---------------- + +- The test suite now can run under PostgreSQL. ([\#3423](https://github.com/matrix-org/synapse/issues/3423)) +- Refactor HTTP replication endpoints to reduce code duplication ([\#3632](https://github.com/matrix-org/synapse/issues/3632)) +- Tests now correctly execute on Python 3. ([\#3647](https://github.com/matrix-org/synapse/issues/3647)) +- Sytests can now be run inside a Docker container. ([\#3660](https://github.com/matrix-org/synapse/issues/3660)) +- Port over enough to Python 3 to allow the sytests to start. ([\#3668](https://github.com/matrix-org/synapse/issues/3668)) +- Update docker base image from alpine 3.7 to 3.8. ([\#3669](https://github.com/matrix-org/synapse/issues/3669)) +- Rename synapse.util.async to synapse.util.async_helpers to mitigate async becoming a keyword on Python 3.7. ([\#3678](https://github.com/matrix-org/synapse/issues/3678)) +- Synapse's tests are now formatted with the black autoformatter. ([\#3679](https://github.com/matrix-org/synapse/issues/3679)) +- Implemented a new testing base class to reduce test boilerplate. ([\#3684](https://github.com/matrix-org/synapse/issues/3684)) +- Rename MAU prometheus metrics ([\#3690](https://github.com/matrix-org/synapse/issues/3690)) +- add new error type ResourceLimit ([\#3707](https://github.com/matrix-org/synapse/issues/3707)) +- Logcontexts for replication command handlers ([\#3709](https://github.com/matrix-org/synapse/issues/3709)) +- Update admin register API documentation to reference a real user ID. ([\#3712](https://github.com/matrix-org/synapse/issues/3712)) + + Synapse 0.33.2 (2018-08-09) =========================== diff --git a/changelog.d/1491.feature b/changelog.d/1491.feature deleted file mode 100644 index 77b6d6ca09..0000000000 --- a/changelog.d/1491.feature +++ /dev/null @@ -1 +0,0 @@ -Add support for the SNI extension to federation TLS connections \ No newline at end of file diff --git a/changelog.d/3184.feature b/changelog.d/3184.feature deleted file mode 100644 index 9f746a57a0..0000000000 --- a/changelog.d/3184.feature +++ /dev/null @@ -1 +0,0 @@ -Add /_media/r0/config diff --git a/changelog.d/3423.misc b/changelog.d/3423.misc deleted file mode 100644 index 51768c6d14..0000000000 --- a/changelog.d/3423.misc +++ /dev/null @@ -1 +0,0 @@ -The test suite now can run under PostgreSQL. diff --git a/changelog.d/3568.feature b/changelog.d/3568.feature deleted file mode 100644 index 247f02ba4e..0000000000 --- a/changelog.d/3568.feature +++ /dev/null @@ -1 +0,0 @@ -speed up /members API and add `at` and `membership` params as per MSC1227 diff --git a/changelog.d/3574.feature b/changelog.d/3574.feature deleted file mode 100644 index 87ac32df72..0000000000 --- a/changelog.d/3574.feature +++ /dev/null @@ -1 +0,0 @@ -implement `summary` block in /sync response as per MSC688 diff --git a/changelog.d/3589.feature b/changelog.d/3589.feature deleted file mode 100644 index a8d7124719..0000000000 --- a/changelog.d/3589.feature +++ /dev/null @@ -1 +0,0 @@ -Add lazy-loading support to /messages as per MSC1227 diff --git a/changelog.d/3632.misc b/changelog.d/3632.misc deleted file mode 100644 index 9d64bbe83b..0000000000 --- a/changelog.d/3632.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor HTTP replication endpoints to reduce code duplication diff --git a/changelog.d/3633.feature b/changelog.d/3633.feature deleted file mode 100644 index 8007a04840..0000000000 --- a/changelog.d/3633.feature +++ /dev/null @@ -1 +0,0 @@ -Add ability to limit number of monthly active users on the server diff --git a/changelog.d/3647.misc b/changelog.d/3647.misc deleted file mode 100644 index dbc66dae60..0000000000 --- a/changelog.d/3647.misc +++ /dev/null @@ -1 +0,0 @@ -Tests now correctly execute on Python 3. diff --git a/changelog.d/3653.feature b/changelog.d/3653.feature deleted file mode 100644 index 6c5422994f..0000000000 --- a/changelog.d/3653.feature +++ /dev/null @@ -1 +0,0 @@ -Support more federation endpoints on workers diff --git a/changelog.d/3654.feature b/changelog.d/3654.feature deleted file mode 100644 index 35c95580bc..0000000000 --- a/changelog.d/3654.feature +++ /dev/null @@ -1 +0,0 @@ -Basic support for room versioning diff --git a/changelog.d/3655.feature b/changelog.d/3655.feature deleted file mode 100644 index 1134e549e7..0000000000 --- a/changelog.d/3655.feature +++ /dev/null @@ -1 +0,0 @@ -Ability to disable client/server Synapse via conf toggle diff --git a/changelog.d/3658.bugfix b/changelog.d/3658.bugfix deleted file mode 100644 index 556011a150..0000000000 --- a/changelog.d/3658.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix occasional glitches in the synapse_event_persisted_position metric diff --git a/changelog.d/3660.misc b/changelog.d/3660.misc deleted file mode 100644 index acd814c273..0000000000 --- a/changelog.d/3660.misc +++ /dev/null @@ -1 +0,0 @@ -Sytests can now be run inside a Docker container. diff --git a/changelog.d/3661.bugfix b/changelog.d/3661.bugfix deleted file mode 100644 index f2b4703d80..0000000000 --- a/changelog.d/3661.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug on deleting 3pid when using identity servers that don't support unbind API diff --git a/changelog.d/3662.feature b/changelog.d/3662.feature deleted file mode 100644 index daacef086d..0000000000 --- a/changelog.d/3662.feature +++ /dev/null @@ -1 +0,0 @@ -Ability to whitelist specific threepids against monthly active user limiting diff --git a/changelog.d/3664.feature b/changelog.d/3664.feature deleted file mode 100644 index 184dde9939..0000000000 --- a/changelog.d/3664.feature +++ /dev/null @@ -1 +0,0 @@ -Add some metrics for the appservice and federation event sending loops diff --git a/changelog.d/3668.misc b/changelog.d/3668.misc deleted file mode 100644 index f9265addf1..0000000000 --- a/changelog.d/3668.misc +++ /dev/null @@ -1 +0,0 @@ -Port over enough to Python 3 to allow the sytests to start. diff --git a/changelog.d/3669.misc b/changelog.d/3669.misc deleted file mode 100644 index fc579ddc60..0000000000 --- a/changelog.d/3669.misc +++ /dev/null @@ -1 +0,0 @@ -Update docker base image from alpine 3.7 to 3.8. diff --git a/changelog.d/3670.feature b/changelog.d/3670.feature deleted file mode 100644 index ba00f2d2ec..0000000000 --- a/changelog.d/3670.feature +++ /dev/null @@ -1 +0,0 @@ -Where server is disabled, block ability for locked out users to read new messages diff --git a/changelog.d/3676.bugfix b/changelog.d/3676.bugfix deleted file mode 100644 index 7b23a2773a..0000000000 --- a/changelog.d/3676.bugfix +++ /dev/null @@ -1 +0,0 @@ -Make the tests pass on Twisted < 18.7.0 diff --git a/changelog.d/3677.bugfix b/changelog.d/3677.bugfix deleted file mode 100644 index caa551627b..0000000000 --- a/changelog.d/3677.bugfix +++ /dev/null @@ -1 +0,0 @@ -Don’t ship recaptcha_ajax.js, use it directly from Google diff --git a/changelog.d/3678.misc b/changelog.d/3678.misc deleted file mode 100644 index 0d7c8da64a..0000000000 --- a/changelog.d/3678.misc +++ /dev/null @@ -1 +0,0 @@ -Rename synapse.util.async to synapse.util.async_helpers to mitigate async becoming a keyword on Python 3.7. diff --git a/changelog.d/3679.misc b/changelog.d/3679.misc deleted file mode 100644 index 1de0a0f2b4..0000000000 --- a/changelog.d/3679.misc +++ /dev/null @@ -1 +0,0 @@ -Synapse's tests are now formatted with the black autoformatter. diff --git a/changelog.d/3681.bugfix b/changelog.d/3681.bugfix deleted file mode 100644 index d18a69cd0c..0000000000 --- a/changelog.d/3681.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fixes test_reap_monthly_active_users so it passes under postgres diff --git a/changelog.d/3684.misc b/changelog.d/3684.misc deleted file mode 100644 index 4c013263c4..0000000000 --- a/changelog.d/3684.misc +++ /dev/null @@ -1 +0,0 @@ -Implemented a new testing base class to reduce test boilerplate. diff --git a/changelog.d/3687.feature b/changelog.d/3687.feature deleted file mode 100644 index 64b89f6411..0000000000 --- a/changelog.d/3687.feature +++ /dev/null @@ -1 +0,0 @@ -set admin uri via config, to be used in error messages where the user should contact the administrator diff --git a/changelog.d/3689.bugfix b/changelog.d/3689.bugfix deleted file mode 100644 index 934d039836..0000000000 --- a/changelog.d/3689.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix mau blocking calulation bug on login diff --git a/changelog.d/3690.misc b/changelog.d/3690.misc deleted file mode 100644 index 710add0243..0000000000 --- a/changelog.d/3690.misc +++ /dev/null @@ -1 +0,0 @@ -Rename MAU prometheus metrics diff --git a/changelog.d/3692.bugfix b/changelog.d/3692.bugfix deleted file mode 100644 index f44e13dca1..0000000000 --- a/changelog.d/3692.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix missing yield in synapse.storage.monthly_active_users.initialise_reserved_users diff --git a/changelog.d/3694.feature b/changelog.d/3694.feature deleted file mode 100644 index 916a342ff4..0000000000 --- a/changelog.d/3694.feature +++ /dev/null @@ -1 +0,0 @@ -Synapse's presence functionality can now be disabled with the "use_presence" configuration option. diff --git a/changelog.d/3700.bugfix b/changelog.d/3700.bugfix deleted file mode 100644 index 492cce1dfc..0000000000 --- a/changelog.d/3700.bugfix +++ /dev/null @@ -1 +0,0 @@ -Improve HTTP request logging to include all requests \ No newline at end of file diff --git a/changelog.d/3701.bugfix b/changelog.d/3701.bugfix deleted file mode 100644 index c22de34537..0000000000 --- a/changelog.d/3701.bugfix +++ /dev/null @@ -1 +0,0 @@ -Avoid timing out requests while we are streaming back the response diff --git a/changelog.d/3703.removal b/changelog.d/3703.removal deleted file mode 100644 index d13ce5adb4..0000000000 --- a/changelog.d/3703.removal +++ /dev/null @@ -1 +0,0 @@ -The Shared-Secret registration method of the legacy v1/register REST endpoint has been removed. For a replacement, please see [the admin/register API documentation](https://github.com/matrix-org/synapse/blob/master/docs/admin_api/register_api.rst). diff --git a/changelog.d/3705.bugfix b/changelog.d/3705.bugfix deleted file mode 100644 index 6c5422994f..0000000000 --- a/changelog.d/3705.bugfix +++ /dev/null @@ -1 +0,0 @@ -Support more federation endpoints on workers diff --git a/changelog.d/3707.misc b/changelog.d/3707.misc deleted file mode 100644 index 8123ca6543..0000000000 --- a/changelog.d/3707.misc +++ /dev/null @@ -1 +0,0 @@ -add new error type ResourceLimit diff --git a/changelog.d/3708.feature b/changelog.d/3708.feature deleted file mode 100644 index 2f146ba62b..0000000000 --- a/changelog.d/3708.feature +++ /dev/null @@ -1 +0,0 @@ -For resource limit blocked users, prevent writing into rooms diff --git a/changelog.d/3709.misc b/changelog.d/3709.misc deleted file mode 100644 index bbda357d44..0000000000 --- a/changelog.d/3709.misc +++ /dev/null @@ -1 +0,0 @@ -Logcontexts for replication command handlers diff --git a/changelog.d/3710.bugfix b/changelog.d/3710.bugfix deleted file mode 100644 index c28e177667..0000000000 --- a/changelog.d/3710.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix "Starting db txn 'get_all_updated_receipts' from sentinel context" warning \ No newline at end of file diff --git a/changelog.d/3712.misc b/changelog.d/3712.misc deleted file mode 100644 index 30f8c2af21..0000000000 --- a/changelog.d/3712.misc +++ /dev/null @@ -1 +0,0 @@ -Update admin register API documentation to reference a real user ID. diff --git a/changelog.d/3713.bugfix b/changelog.d/3713.bugfix deleted file mode 100644 index 6c5422994f..0000000000 --- a/changelog.d/3713.bugfix +++ /dev/null @@ -1 +0,0 @@ -Support more federation endpoints on workers diff --git a/changelog.d/3719.bugfix b/changelog.d/3719.bugfix deleted file mode 100644 index 095d9288e8..0000000000 --- a/changelog.d/3719.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where `state_cache` cache factor ignored environment variables -- cgit 1.5.1 From 1058d141272f17435b3548a4b12abb51a6372383 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 Aug 2018 17:16:58 +0100 Subject: Make the in flight background process metrics thread safe --- synapse/metrics/background_process_metrics.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index ce678d5f75..264fb93892 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -14,6 +14,7 @@ # limitations under the License. import six +import threading from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily @@ -78,6 +79,9 @@ _background_process_counts = dict() # type: dict[str, int] # of process descriptions that no longer have any active processes. _background_processes = dict() # type: dict[str, set[_BackgroundProcess]] +# A lock that covers the above dicts +_bg_metrics_lock = threading.Lock() + class _Collector(object): """A custom metrics collector for the background process metrics. @@ -92,7 +96,11 @@ class _Collector(object): labels=["name"], ) - for desc, processes in six.iteritems(_background_processes): + # We copy the dict so that it doesn't change from underneath us + with _bg_metrics_lock: + _background_processes_copy = dict(_background_processes) + + for desc, processes in six.iteritems(_background_processes_copy): background_process_in_flight_count.add_metric( (desc,), len(processes), ) @@ -167,19 +175,26 @@ def run_as_background_process(desc, func, *args, **kwargs): """ @defer.inlineCallbacks def run(): - count = _background_process_counts.get(desc, 0) - _background_process_counts[desc] = count + 1 + with _bg_metrics_lock: + count = _background_process_counts.get(desc, 0) + _background_process_counts[desc] = count + 1 + _background_process_start_count.labels(desc).inc() with LoggingContext(desc) as context: context.request = "%s-%i" % (desc, count) proc = _BackgroundProcess(desc, context) - _background_processes.setdefault(desc, set()).add(proc) + + with _bg_metrics_lock: + _background_processes.setdefault(desc, set()).add(proc) + try: yield func(*args, **kwargs) finally: proc.update_metrics() - _background_processes[desc].remove(proc) + + with _bg_metrics_lock: + _background_processes[desc].remove(proc) with PreserveLoggingContext(): return run() -- cgit 1.5.1 From b01a75549893bf4f8f986a74a499e7cb6131868c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 Aug 2018 17:27:52 +0100 Subject: Make the in flight requests metrics thread safe --- synapse/http/request_metrics.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 588e280571..284d5e05de 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +import threading from prometheus_client.core import Counter, Histogram @@ -111,6 +112,9 @@ in_flight_requests_db_sched_duration = Counter( # The set of all in flight requests, set[RequestMetrics] _in_flight_requests = set() +# Protects the _in_flight_requests set from concurrent accesss +_in_flight_reqeuests_lock = threading.Lock() + def _get_in_flight_counts(): """Returns a count of all in flight requests by (method, server_name) @@ -120,7 +124,8 @@ def _get_in_flight_counts(): """ # Cast to a list to prevent it changing while the Prometheus # thread is collecting metrics - reqs = list(_in_flight_requests) + with _in_flight_reqeuests_lock: + reqs = list(_in_flight_requests) for rm in reqs: rm.update_metrics() @@ -154,10 +159,12 @@ class RequestMetrics(object): # to the "in flight" metrics. self._request_stats = self.start_context.get_resource_usage() - _in_flight_requests.add(self) + with _in_flight_reqeuests_lock: + _in_flight_requests.add(self) def stop(self, time_sec, request): - _in_flight_requests.discard(self) + with _in_flight_reqeuests_lock: + _in_flight_requests.discard(self) context = LoggingContext.current_context() -- cgit 1.5.1 From e2c0aa2c2619d6db7f6d6b1413430133e365f4cd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 20 Aug 2018 17:40:59 +0100 Subject: Newsfile --- changelog.d/3722.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3722.bugfix diff --git a/changelog.d/3722.bugfix b/changelog.d/3722.bugfix new file mode 100644 index 0000000000..16cbaf76cb --- /dev/null +++ b/changelog.d/3722.bugfix @@ -0,0 +1 @@ +Fix error collecting prometheus metrics when run on dedicated thread due to threading concurrency issues -- cgit 1.5.1 From 55e6bdf28798153de5c2a6cf9bf0a6618f59168a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 20 Aug 2018 18:20:07 +0100 Subject: Robustness fix for logcontext filter Make the logcontext filter not explode if it somehow ends up with a logcontext of None, since that infinite-loops the whole logging system. --- synapse/util/logcontext.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 07e83fadda..848765e530 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -385,7 +385,13 @@ class LoggingContextFilter(logging.Filter): context = LoggingContext.current_context() for key, value in self.defaults.items(): setattr(record, key, value) - context.copy_to(record) + + # context should never be None, but if it somehow ends up being, then + # we end up in a death spiral of infinite loops, so let's check, for + # robustness' sake. + if context is not None: + context.copy_to(record) + return True -- cgit 1.5.1 From be6527325a829e7d5e4540d7a1983a6e6fda2c0f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 20 Aug 2018 18:21:10 +0100 Subject: Fix exceptions when a connection is closed before we read the headers This fixes bugs introduced in #3700, by making sure that we behave sanely when an incoming connection is closed before the headers are read. --- synapse/http/site.py | 8 +++++++- synapse/util/logcontext.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index ad2a98468e..88ed3714f9 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -182,7 +182,7 @@ class SynapseRequest(Request): # the client disconnects. with PreserveLoggingContext(self.logcontext): logger.warn( - "Error processing request: %s %s", reason.type, reason.value, + "Error processing request %r: %s %s", self, reason.type, reason.value, ) if not self._is_processing: @@ -219,6 +219,12 @@ class SynapseRequest(Request): """Log the completion of this request and update the metrics """ + if self.logcontext is None: + # this can happen if the connection closed before we read the + # headers (so render was never called). In that case we'll already + # have logged a warning, so just bail out. + return + usage = self.logcontext.get_resource_usage() if self._processing_finished_time is None: diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 848765e530..a0c2d37610 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -402,7 +402,9 @@ class PreserveLoggingContext(object): __slots__ = ["current_context", "new_context", "has_parent"] - def __init__(self, new_context=LoggingContext.sentinel): + def __init__(self, new_context=None): + if new_context is None: + new_context = LoggingContext.sentinel self.new_context = new_context def __enter__(self): -- cgit 1.5.1 From 012d612f9d3c8ca58538fffe4207309bf1644cd8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 20 Aug 2018 18:26:27 +0100 Subject: changelog --- changelog.d/3723.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3723.bugfix diff --git a/changelog.d/3723.bugfix b/changelog.d/3723.bugfix new file mode 100644 index 0000000000..7c21083691 --- /dev/null +++ b/changelog.d/3723.bugfix @@ -0,0 +1 @@ +Fix bug in v0.33.3rc1 which caused infinite loops and OOMs -- cgit 1.5.1 From 3b5b64ac99c1a253cc24b6a1063247115ba18cd3 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 21 Aug 2018 03:48:55 +1000 Subject: changelog --- CHANGES.md | 9 +++++++++ changelog.d/3723.bugfix | 1 - synapse/__init__.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) delete mode 100644 changelog.d/3723.bugfix diff --git a/CHANGES.md b/CHANGES.md index 68ce5d1471..df01178971 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 0.33.3rc2 (2018-08-21) +============================== + +Bugfixes +-------- + +- Fix bug in v0.33.3rc1 which caused infinite loops and OOMs ([\#3723](https://github.com/matrix-org/synapse/issues/3723)) + + Synapse 0.33.3rc1 (2018-08-21) ============================== diff --git a/changelog.d/3723.bugfix b/changelog.d/3723.bugfix deleted file mode 100644 index 7c21083691..0000000000 --- a/changelog.d/3723.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug in v0.33.3rc1 which caused infinite loops and OOMs diff --git a/synapse/__init__.py b/synapse/__init__.py index 5175a1161d..252c49ca82 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,4 +17,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.33.3rc1" +__version__ = "0.33.3rc2" -- cgit 1.5.1 From 3f6762f0bbeb611714ba6c9c4329ea7d5e46cbda Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 21 Aug 2018 09:38:38 +0100 Subject: isort --- synapse/metrics/background_process_metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 264fb93892..167167be0a 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six import threading +import six + from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily from twisted.internet import defer -- cgit 1.5.1 From 808d8e06aa6a3bdd9016f82d98088111e5741f4c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 21 Aug 2018 11:09:46 +0100 Subject: Don't log exceptions when failing to fetch server keys Not being able to resolve or connect to remote servers is an expected error, so we shouldn't log at ERROR with stacktraces. --- synapse/crypto/keyclient.py | 8 ++++++-- synapse/federation/transport/server.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py index c20a32096a..e94400b8e2 100644 --- a/synapse/crypto/keyclient.py +++ b/synapse/crypto/keyclient.py @@ -18,7 +18,9 @@ import logging from canonicaljson import json from twisted.internet import defer, reactor +from twisted.internet.error import ConnectError from twisted.internet.protocol import Factory +from twisted.names.error import DomainError from twisted.web.http import HTTPClient from synapse.http.endpoint import matrix_federation_endpoint @@ -47,12 +49,14 @@ def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1): server_response, server_certificate = yield protocol.remote_key defer.returnValue((server_response, server_certificate)) except SynapseKeyClientError as e: - logger.exception("Error getting key for %r" % (server_name,)) + logger.warn("Error getting key for %r: %s", server_name, e) if e.status.startswith("4"): # Don't retry for 4xx responses. raise IOError("Cannot get key for %r" % server_name) + except (ConnectError, DomainError) as e: + logger.warn("Error getting key for %r: %s", server_name, e) except Exception as e: - logger.exception(e) + logger.exception("Error getting key for %r", server_name) raise IOError("Cannot get key for %r" % server_name) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 77969a4f38..33a37d4498 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -261,10 +261,10 @@ class BaseFederationServlet(object): except NoAuthenticationError: origin = None if self.REQUIRE_AUTH: - logger.exception("authenticate_request failed") + logger.warn("authenticate_request failed") raise except Exception: - logger.exception("authenticate_request failed") + logger.warn("authenticate_request failed") raise if origin: -- cgit 1.5.1 From 79d3b4689e1e9fa5304beb91c6b738f9131b6aa8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 21 Aug 2018 11:21:48 +0100 Subject: Newsfile --- changelog.d/3727.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3727.misc diff --git a/changelog.d/3727.misc b/changelog.d/3727.misc new file mode 100644 index 0000000000..0b83220d90 --- /dev/null +++ b/changelog.d/3727.misc @@ -0,0 +1 @@ +Log failure to authenticate remote servers as warnings (without stack traces) -- cgit 1.5.1 From c2c153dd3b97ec98ec7ef295deeb466d72027a01 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 21 Aug 2018 11:41:07 +0100 Subject: Log more detail when we fail to authenticate request --- synapse/federation/transport/server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 33a37d4498..7a993fd1cf 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -261,10 +261,10 @@ class BaseFederationServlet(object): except NoAuthenticationError: origin = None if self.REQUIRE_AUTH: - logger.warn("authenticate_request failed") + logger.warn("authenticate_request failed: missing authentication") raise - except Exception: - logger.warn("authenticate_request failed") + except Exception as e: + logger.warn("authenticate_request failed: %s", e) raise if origin: -- cgit 1.5.1 From cd6937fb266b72080f73a7a41da761f7f8775d96 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 21 Aug 2018 16:28:10 +0100 Subject: Fix typo --- synapse/http/request_metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 284d5e05de..72c2654678 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -113,7 +113,7 @@ in_flight_requests_db_sched_duration = Counter( _in_flight_requests = set() # Protects the _in_flight_requests set from concurrent accesss -_in_flight_reqeuests_lock = threading.Lock() +_in_flight_requests_lock = threading.Lock() def _get_in_flight_counts(): @@ -124,7 +124,7 @@ def _get_in_flight_counts(): """ # Cast to a list to prevent it changing while the Prometheus # thread is collecting metrics - with _in_flight_reqeuests_lock: + with _in_flight_requests_lock: reqs = list(_in_flight_requests) for rm in reqs: @@ -159,11 +159,11 @@ class RequestMetrics(object): # to the "in flight" metrics. self._request_stats = self.start_context.get_resource_usage() - with _in_flight_reqeuests_lock: + with _in_flight_requests_lock: _in_flight_requests.add(self) def stop(self, time_sec, request): - with _in_flight_reqeuests_lock: + with _in_flight_requests_lock: _in_flight_requests.discard(self) context = LoggingContext.current_context() -- cgit 1.5.1 From f7baff6f7b4af039254ec16e9272b90adb58dab3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 21 Aug 2018 22:41:41 +0100 Subject: Fix 500 error from /consent form Fixes #3731 --- synapse/rest/consent/consent_resource.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index 147ff7d79b..52954ada12 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -175,7 +175,7 @@ class ConsentResource(Resource): """ version = parse_string(request, "v", required=True) username = parse_string(request, "u", required=True) - userhmac = parse_string(request, "h", required=True) + userhmac = parse_string(request, "h", required=True, encoding=None) self._check_hash(username, userhmac) @@ -210,9 +210,18 @@ class ConsentResource(Resource): finish_request(request) def _check_hash(self, userid, userhmac): + """ + Args: + userid (unicode): + userhmac (bytes): + + Raises: + SynapseError if the hash doesn't match + + """ want_mac = hmac.new( key=self._hmac_secret, - msg=userid, + msg=userid.encode('utf-8'), digestmod=sha256, ).hexdigest() -- cgit 1.5.1 From f7bf181a909384b649d2b615569921ae6add0505 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 21 Aug 2018 23:14:25 +0100 Subject: fix another consent encoding fail --- synapse/rest/consent/consent_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index 52954ada12..7362e1858d 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -140,7 +140,7 @@ class ConsentResource(Resource): version = parse_string(request, "v", default=self._default_consent_version) username = parse_string(request, "u", required=True) - userhmac = parse_string(request, "h", required=True) + userhmac = parse_string(request, "h", required=True, encoding=None) self._check_hash(username, userhmac) -- cgit 1.5.1 From afb4b490a417aec9223cfd1dcf5aa36c0fc5a6d1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 21 Aug 2018 23:19:14 +0100 Subject: changelog --- changelog.d/3732.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3732.bugfix diff --git a/changelog.d/3732.bugfix b/changelog.d/3732.bugfix new file mode 100644 index 0000000000..638b6334b0 --- /dev/null +++ b/changelog.d/3732.bugfix @@ -0,0 +1 @@ +Fix bug introduced in v0.33.3rc1 which made the ToS give a 500 error \ No newline at end of file -- cgit 1.5.1 From bb81e78ec6c05edc95b25a954bdf1cf688d4d652 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 22 Aug 2018 00:56:37 +0200 Subject: Split the state_group_cache in two (#3726) Splits the state_group_cache in two. One half contains normal state events; the other contains member events. The idea is that the lazyloading common case of: "I want a subset of member events plus all of the other state" can be accomplished efficiently by splitting the cache into two, and asking for "all events" from the non-members cache, and "just these keys" from the members cache. This means we can avoid having to make DictionaryCache aware of these sort of complicated queries, whilst letting LL requests benefit from the caching. Previously we were unable to sensibly use the caching and had to pull all state from the DB irrespective of the filtering, which made things slow. Hopefully fixes https://github.com/matrix-org/synapse/issues/3720. --- changelog.d/3726.misc | 1 + synapse/storage/state.py | 158 +++++++++++++++++++++++++++++++++++++++----- tests/storage/test_state.py | 105 ++++++++++++++++++++++++++--- 3 files changed, 236 insertions(+), 28 deletions(-) create mode 100644 changelog.d/3726.misc diff --git a/changelog.d/3726.misc b/changelog.d/3726.misc new file mode 100644 index 0000000000..c4f66ec998 --- /dev/null +++ b/changelog.d/3726.misc @@ -0,0 +1 @@ +Split the state_group_cache into member and non-member state events (and so speed up LL /sync) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index dd03c4168b..4b971efdba 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -60,8 +60,43 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): def __init__(self, db_conn, hs): super(StateGroupWorkerStore, self).__init__(db_conn, hs) + # Originally the state store used a single DictionaryCache to cache the + # event IDs for the state types in a given state group to avoid hammering + # on the state_group* tables. + # + # The point of using a DictionaryCache is that it can cache a subset + # of the state events for a given state group (i.e. a subset of the keys for a + # given dict which is an entry in the cache for a given state group ID). + # + # However, this poses problems when performing complicated queries + # on the store - for instance: "give me all the state for this group, but + # limit members to this subset of users", as DictionaryCache's API isn't + # rich enough to say "please cache any of these fields, apart from this subset". + # This is problematic when lazy loading members, which requires this behaviour, + # as without it the cache has no choice but to speculatively load all + # state events for the group, which negates the efficiency being sought. + # + # Rather than overcomplicating DictionaryCache's API, we instead split the + # state_group_cache into two halves - one for tracking non-member events, + # and the other for tracking member_events. This means that lazy loading + # queries can be made in a cache-friendly manner by querying both caches + # separately and then merging the result. So for the example above, you + # would query the members cache for a specific subset of state keys + # (which DictionaryCache will handle efficiently and fine) and the non-members + # cache for all state (which DictionaryCache will similarly handle fine) + # and then just merge the results together. + # + # We size the non-members cache to be smaller than the members cache as the + # vast majority of state in Matrix (today) is member events. + self._state_group_cache = DictionaryCache( - "*stateGroupCache*", 500000 * get_cache_factor_for("stateGroupCache") + "*stateGroupCache*", + # TODO: this hasn't been tuned yet + 50000 * get_cache_factor_for("stateGroupCache") + ) + self._state_group_members_cache = DictionaryCache( + "*stateGroupMembersCache*", + 500000 * get_cache_factor_for("stateGroupMembersCache") ) @defer.inlineCallbacks @@ -275,7 +310,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types): + def _get_state_groups_from_groups(self, groups, types, members=None): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -284,6 +319,9 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. + members (bool|None): If not None, then, in addition to any filtering + implied by types, the results are also filtered to only include + member events (if True), or to exclude member events (if False) Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -294,14 +332,14 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, + self._get_state_groups_from_groups_txn, chunk, types, members, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, + self, txn, groups, types=None, members=None, ): results = {group: {} for group in groups} @@ -339,6 +377,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): %s """) + if members is True: + sql += " AND type = '%s'" % (EventTypes.Member,) + elif members is False: + sql += " AND type <> '%s'" % (EventTypes.Member,) + # Turns out that postgres doesn't like doing a list of OR's and # is about 1000x slower, so we just issue a query for each specific # type seperately. @@ -386,6 +429,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): else: where_clause = "" + if members is True: + where_clause += " AND type = '%s'" % EventTypes.Member + elif members is False: + where_clause += " AND type <> '%s'" % EventTypes.Member + # We don't use WITH RECURSIVE on sqlite3 as there are distributions # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) for group in groups: @@ -580,10 +628,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) - def _get_some_state_from_cache(self, group, types, filtered_types=None): + def _get_some_state_from_cache(self, cache, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` Args: + cache(DictionaryCache): the state group cache to use group(int): The state group to lookup types(list[str, str|None]): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all @@ -597,11 +646,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): requests state from the cache, if False we need to query the DB for the missing state. """ - is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) + is_all, known_absent, state_dict_ids = cache.get(group) type_to_key = {} - # tracks whether any of ourrequested types are missing from the cache + # tracks whether any of our requested types are missing from the cache missing_types = False for typ, state_key in types: @@ -648,7 +697,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): if include(k[0], k[1]) }, got_all - def _get_all_state_from_cache(self, group): + def _get_all_state_from_cache(self, cache, group): """Checks if group is in cache. See `_get_state_for_groups` Returns 2-tuple (`state_dict`, `got_all`). `got_all` is a bool @@ -656,9 +705,10 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): cache, if False we need to query the DB for the missing state. Args: + cache(DictionaryCache): the state group cache to use group: The state group to lookup """ - is_all, _, state_dict_ids = self._state_group_cache.get(group) + is_all, _, state_dict_ids = cache.get(group) return state_dict_ids, is_all @@ -681,6 +731,62 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. + Returns: + Deferred[dict[int, dict[(type, state_key), EventBase]]] + a dictionary mapping from state group to state dictionary. + """ + if types is not None: + non_member_types = [t for t in types if t[0] != EventTypes.Member] + + if filtered_types is not None and EventTypes.Member not in filtered_types: + # we want all of the membership events + member_types = None + else: + member_types = [t for t in types if t[0] == EventTypes.Member] + + else: + non_member_types = None + member_types = None + + non_member_state = yield self._get_state_for_groups_using_cache( + groups, self._state_group_cache, non_member_types, filtered_types, + ) + # XXX: we could skip this entirely if member_types is [] + member_state = yield self._get_state_for_groups_using_cache( + # we set filtered_types=None as member_state only ever contain members. + groups, self._state_group_members_cache, member_types, None, + ) + + state = non_member_state + for group in groups: + state[group].update(member_state[group]) + + defer.returnValue(state) + + @defer.inlineCallbacks + def _get_state_for_groups_using_cache( + self, groups, cache, types=None, filtered_types=None + ): + """Gets the state at each of a list of state groups, optionally + filtering by type/state_key, querying from a specific cache. + + Args: + groups (iterable[int]): list of state groups for which we want + to get the state. + cache (DictionaryCache): the cache of group ids to state dicts which + we will pass through - either the normal state cache or the specific + members state cache. + types (None|iterable[(str, None|str)]): + indicates the state type/keys required. If None, the whole + state is fetched and returned. + + Otherwise, each entry should be a `(type, state_key)` tuple to + include in the response. A `state_key` of None is a wildcard + meaning that we require all state with that type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. + Returns: Deferred[dict[int, dict[(type, state_key), EventBase]]] a dictionary mapping from state group to state dictionary. @@ -692,7 +798,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): if types is not None: for group in set(groups): state_dict_ids, got_all = self._get_some_state_from_cache( - group, types, filtered_types + cache, group, types, filtered_types ) results[group] = state_dict_ids @@ -701,7 +807,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): else: for group in set(groups): state_dict_ids, got_all = self._get_all_state_from_cache( - group + cache, group ) results[group] = state_dict_ids @@ -710,8 +816,8 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): missing_groups.append(group) if missing_groups: - # Okay, so we have some missing_types, lets fetch them. - cache_seq_num = self._state_group_cache.sequence + # Okay, so we have some missing_types, let's fetch them. + cache_seq_num = cache.sequence # the DictionaryCache knows if it has *all* the state, but # does not know if it has all of the keys of a particular type, @@ -725,7 +831,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch + missing_groups, types_to_fetch, cache == self._state_group_members_cache, ) for group, group_state_dict in iteritems(group_to_state_dict): @@ -745,7 +851,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): # update the cache with all the things we fetched from the # database. - self._state_group_cache.update( + cache.update( cache_seq_num, key=group, value=group_state_dict, @@ -847,15 +953,33 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): ], ) - # Prefill the state group cache with this group. + # Prefill the state group caches with this group. # It's fine to use the sequence like this as the state group map # is immutable. (If the map wasn't immutable then this prefill could # race with another update) + + current_member_state_ids = { + s: ev + for (s, ev) in iteritems(current_state_ids) + if s[0] == EventTypes.Member + } + txn.call_after( + self._state_group_members_cache.update, + self._state_group_members_cache.sequence, + key=state_group, + value=dict(current_member_state_ids), + ) + + current_non_member_state_ids = { + s: ev + for (s, ev) in iteritems(current_state_ids) + if s[0] != EventTypes.Member + } txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, key=state_group, - value=dict(current_state_ids), + value=dict(current_non_member_state_ids), ) return state_group diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index ebfd969b36..d717b9f94e 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -185,6 +185,7 @@ class StateStoreTestCase(tests.unittest.TestCase): # test _get_some_state_from_cache correctly filters out members with types=[] (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_cache, group, [], filtered_types=[EventTypes.Member] ) @@ -197,8 +198,20 @@ class StateStoreTestCase(tests.unittest.TestCase): state_dict, ) + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, + group, [], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual( + {}, + state_dict, + ) + # test _get_some_state_from_cache correctly filters in members with wildcard types (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_cache, group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] ) @@ -207,6 +220,18 @@ class StateStoreTestCase(tests.unittest.TestCase): { (e1.type, e1.state_key): e1.event_id, (e2.type, e2.state_key): e2.event_id, + }, + state_dict, + ) + + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, + group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual( + { (e3.type, e3.state_key): e3.event_id, # e4 is overwritten by e5 (e5.type, e5.state_key): e5.event_id, @@ -216,6 +241,7 @@ class StateStoreTestCase(tests.unittest.TestCase): # test _get_some_state_from_cache correctly filters in members with specific types (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_cache, group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member], @@ -226,6 +252,20 @@ class StateStoreTestCase(tests.unittest.TestCase): { (e1.type, e1.state_key): e1.event_id, (e2.type, e2.state_key): e2.event_id, + }, + state_dict, + ) + + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, + group, + [(EventTypes.Member, e5.state_key)], + filtered_types=[EventTypes.Member], + ) + + self.assertEqual(is_all, True) + self.assertDictEqual( + { (e5.type, e5.state_key): e5.event_id, }, state_dict, @@ -234,6 +274,7 @@ class StateStoreTestCase(tests.unittest.TestCase): # test _get_some_state_from_cache correctly filters in members with specific types # and no filtered_types (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, group, [(EventTypes.Member, e5.state_key)], filtered_types=None ) @@ -254,9 +295,6 @@ class StateStoreTestCase(tests.unittest.TestCase): { (e1.type, e1.state_key): e1.event_id, (e2.type, e2.state_key): e2.event_id, - (e3.type, e3.state_key): e3.event_id, - # e4 is overwritten by e5 - (e5.type, e5.state_key): e5.event_id, }, ) @@ -269,8 +307,6 @@ class StateStoreTestCase(tests.unittest.TestCase): # list fetched keys so it knows it's partial fetched_keys=( (e1.type, e1.state_key), - (e3.type, e3.state_key), - (e5.type, e5.state_key), ), ) @@ -284,8 +320,6 @@ class StateStoreTestCase(tests.unittest.TestCase): set( [ (e1.type, e1.state_key), - (e3.type, e3.state_key), - (e5.type, e5.state_key), ] ), ) @@ -293,8 +327,6 @@ class StateStoreTestCase(tests.unittest.TestCase): state_dict_ids, { (e1.type, e1.state_key): e1.event_id, - (e3.type, e3.state_key): e3.event_id, - (e5.type, e5.state_key): e5.event_id, }, ) @@ -304,14 +336,25 @@ class StateStoreTestCase(tests.unittest.TestCase): # test _get_some_state_from_cache correctly filters out members with types=[] room_id = self.room.to_string() (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_cache, group, [], filtered_types=[EventTypes.Member] ) self.assertEqual(is_all, False) self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict) + room_id = self.room.to_string() + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, + group, [], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({}, state_dict) + # test _get_some_state_from_cache correctly filters in members wildcard types (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_cache, group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] ) @@ -319,8 +362,19 @@ class StateStoreTestCase(tests.unittest.TestCase): self.assertDictEqual( { (e1.type, e1.state_key): e1.event_id, + }, + state_dict, + ) + + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, + group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual( + { (e3.type, e3.state_key): e3.event_id, - # e4 is overwritten by e5 (e5.type, e5.state_key): e5.event_id, }, state_dict, @@ -328,6 +382,7 @@ class StateStoreTestCase(tests.unittest.TestCase): # test _get_some_state_from_cache correctly filters in members with specific types (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_cache, group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member], @@ -337,6 +392,20 @@ class StateStoreTestCase(tests.unittest.TestCase): self.assertDictEqual( { (e1.type, e1.state_key): e1.event_id, + }, + state_dict, + ) + + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, + group, + [(EventTypes.Member, e5.state_key)], + filtered_types=[EventTypes.Member], + ) + + self.assertEqual(is_all, True) + self.assertDictEqual( + { (e5.type, e5.state_key): e5.event_id, }, state_dict, @@ -345,8 +414,22 @@ class StateStoreTestCase(tests.unittest.TestCase): # test _get_some_state_from_cache correctly filters in members with specific types # and no filtered_types (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_cache, + group, [(EventTypes.Member, e5.state_key)], filtered_types=None + ) + + self.assertEqual(is_all, False) + self.assertDictEqual({}, state_dict) + + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + self.store._state_group_members_cache, group, [(EventTypes.Member, e5.state_key)], filtered_types=None ) self.assertEqual(is_all, True) - self.assertDictEqual({(e5.type, e5.state_key): e5.event_id}, state_dict) + self.assertDictEqual( + { + (e5.type, e5.state_key): e5.event_id, + }, + state_dict, + ) -- cgit 1.5.1 From eb7be75a108e002d547dc0f0566386ddc0239a5f Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 21 Aug 2018 23:38:06 -0600 Subject: Create 3735.misc --- changelog.d/3735.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3735.misc diff --git a/changelog.d/3735.misc b/changelog.d/3735.misc new file mode 100644 index 0000000000..f17004be71 --- /dev/null +++ b/changelog.d/3735.misc @@ -0,0 +1 @@ +Fix minor spelling error in federation client documentation. -- cgit 1.5.1