diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 51ec727df0..ef20c2296c 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015 - 2016 OpenMarket Ltd
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,24 +14,34 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.api.constants import Membership, EventTypes
-from synapse.util.async import concurrently_execute
+import collections
+import itertools
+import logging
+
+from six import iteritems, itervalues
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.push.clientformat import format_push_rules_for_user
+from synapse.types import RoomStreamToken
+from synapse.util.async_helpers import concurrently_execute
+from synapse.util.caches.expiringcache import ExpiringCache
+from synapse.util.caches.lrucache import LruCache
+from synapse.util.caches.response_cache import ResponseCache
from synapse.util.logcontext import LoggingContext
from synapse.util.metrics import Measure, measure_func
-from synapse.util.caches.response_cache import ResponseCache
-from synapse.push.clientformat import format_push_rules_for_user
from synapse.visibility import filter_events_for_client
-from synapse.types import RoomStreamToken
-from twisted.internet import defer
-
-import collections
-import logging
-import itertools
+logger = logging.getLogger(__name__)
-from six import itervalues, iteritems
+# Store the cache that tracks which lazy-loaded members have been sent to a given
+# client for no more than 30 minutes.
+LAZY_LOADED_MEMBERS_CACHE_MAX_AGE = 30 * 60 * 1000
-logger = logging.getLogger(__name__)
+# Remember the last 100 members we sent to a client for the purposes of
+# avoiding redundantly sending the same lazy-loaded members to the client
+LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE = 100
SyncConfig = collections.namedtuple("SyncConfig", [
@@ -64,6 +75,7 @@ class JoinedSyncResult(collections.namedtuple("JoinedSyncResult", [
"ephemeral",
"account_data",
"unread_notifications",
+ "summary",
])):
__slots__ = []
@@ -145,7 +157,7 @@ class SyncResult(collections.namedtuple("SyncResult", [
"invited", # InvitedSyncResult for each invited room.
"archived", # ArchivedSyncResult for each archived room.
"to_device", # List of direct messages for the device.
- "device_lists", # List of user_ids whose devices have chanegd
+ "device_lists", # List of user_ids whose devices have changed
"device_one_time_keys_count", # Dict of algorithm to count for one time keys
# for this device
"groups",
@@ -173,6 +185,7 @@ class SyncResult(collections.namedtuple("SyncResult", [
class SyncHandler(object):
def __init__(self, hs):
+ self.hs_config = hs.config
self.store = hs.get_datastore()
self.notifier = hs.get_notifier()
self.presence_handler = hs.get_presence_handler()
@@ -180,20 +193,35 @@ class SyncHandler(object):
self.clock = hs.get_clock()
self.response_cache = ResponseCache(hs, "sync")
self.state = hs.get_state_handler()
+ self.auth = hs.get_auth()
+
+ # ExpiringCache((User, Device)) -> LruCache(state_key => event_id)
+ self.lazy_loaded_members_cache = ExpiringCache(
+ "lazy_loaded_members_cache", self.clock,
+ max_len=0, expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE,
+ )
+ @defer.inlineCallbacks
def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0,
full_state=False):
"""Get the sync for a client if we have new data for it now. Otherwise
wait for new data to arrive on the server. If the timeout expires, then
return an empty sync result.
Returns:
- A Deferred SyncResult.
+ Deferred[SyncResult]
"""
- return self.response_cache.wrap(
+ # If the user is not part of the mau group, then check that limits have
+ # not been exceeded (if not part of the group by this point, almost certain
+ # auth_blocking will occur)
+ user_id = sync_config.user.to_string()
+ yield self.auth.check_auth_blocking(user_id)
+
+ res = yield self.response_cache.wrap(
sync_config.request_key,
self._wait_for_sync_for_user,
sync_config, since_token, timeout, full_state,
)
+ defer.returnValue(res)
@defer.inlineCallbacks
def _wait_for_sync_for_user(self, sync_config, since_token, timeout,
@@ -416,29 +444,44 @@ class SyncHandler(object):
))
@defer.inlineCallbacks
- def get_state_after_event(self, event):
+ def get_state_after_event(self, event, types=None, filtered_types=None):
"""
Get the room state after the given event
Args:
event(synapse.events.EventBase): event of interest
+ types(list[(str, str|None)]|None): List of (type, state_key) tuples
+ which are used to filter the state fetched. If `state_key` is None,
+ all events are returned of the given type.
+ May be None, which matches any key.
+ filtered_types(list[str]|None): Only apply filtering via `types` to this
+ list of event types. Other types of events are returned unfiltered.
+ If None, `types` filtering is applied to all events.
Returns:
A Deferred map from ((type, state_key)->Event)
"""
- state_ids = yield self.store.get_state_ids_for_event(event.event_id)
+ state_ids = yield self.store.get_state_ids_for_event(
+ event.event_id, types, filtered_types=filtered_types,
+ )
if event.is_state():
state_ids = state_ids.copy()
state_ids[(event.type, event.state_key)] = event.event_id
defer.returnValue(state_ids)
@defer.inlineCallbacks
- def get_state_at(self, room_id, stream_position):
+ def get_state_at(self, room_id, stream_position, types=None, filtered_types=None):
""" Get the room state at a particular stream position
Args:
room_id(str): room for which to get state
stream_position(StreamToken): point at which to get state
+ types(list[(str, str|None)]|None): List of (type, state_key) tuples
+ which are used to filter the state fetched. If `state_key` is None,
+ all events are returned of the given type.
+ filtered_types(list[str]|None): Only apply filtering via `types` to this
+ list of event types. Other types of events are returned unfiltered.
+ If None, `types` filtering is applied to all events.
Returns:
A Deferred map from ((type, state_key)->Event)
@@ -453,7 +496,9 @@ class SyncHandler(object):
if last_events:
last_event = last_events[-1]
- state = yield self.get_state_after_event(last_event)
+ state = yield self.get_state_after_event(
+ last_event, types, filtered_types=filtered_types,
+ )
else:
# no events in this room - so presumably no state
@@ -461,9 +506,141 @@ class SyncHandler(object):
defer.returnValue(state)
@defer.inlineCallbacks
+ def compute_summary(self, room_id, sync_config, batch, state, now_token):
+ """ Works out a room summary block for this room, summarising the number
+ of joined members in the room, and providing the 'hero' members if the
+ room has no name so clients can consistently name rooms. Also adds
+ state events to 'state' if needed to describe the heroes.
+
+ Args:
+ room_id(str):
+ sync_config(synapse.handlers.sync.SyncConfig):
+ batch(synapse.handlers.sync.TimelineBatch): The timeline batch for
+ the room that will be sent to the user.
+ state(dict): dict of (type, state_key) -> Event as returned by
+ compute_state_delta
+ now_token(str): Token of the end of the current batch.
+
+ Returns:
+ A deferred dict describing the room summary
+ """
+
+ # FIXME: this promulgates https://github.com/matrix-org/synapse/issues/3305
+ last_events, _ = yield self.store.get_recent_event_ids_for_room(
+ room_id, end_token=now_token.room_key, limit=1,
+ )
+
+ if not last_events:
+ defer.returnValue(None)
+ return
+
+ last_event = last_events[-1]
+ state_ids = yield self.store.get_state_ids_for_event(
+ last_event.event_id, [
+ (EventTypes.Member, None),
+ (EventTypes.Name, ''),
+ (EventTypes.CanonicalAlias, ''),
+ ]
+ )
+
+ member_ids = {
+ state_key: event_id
+ for (t, state_key), event_id in state_ids.iteritems()
+ if t == EventTypes.Member
+ }
+ name_id = state_ids.get((EventTypes.Name, ''))
+ canonical_alias_id = state_ids.get((EventTypes.CanonicalAlias, ''))
+
+ summary = {}
+
+ # FIXME: it feels very heavy to load up every single membership event
+ # just to calculate the counts.
+ member_events = yield self.store.get_events(member_ids.values())
+
+ joined_user_ids = []
+ invited_user_ids = []
+
+ for ev in member_events.values():
+ if ev.content.get("membership") == Membership.JOIN:
+ joined_user_ids.append(ev.state_key)
+ elif ev.content.get("membership") == Membership.INVITE:
+ invited_user_ids.append(ev.state_key)
+
+ # TODO: only send these when they change.
+ summary["m.joined_member_count"] = len(joined_user_ids)
+ summary["m.invited_member_count"] = len(invited_user_ids)
+
+ if name_id or canonical_alias_id:
+ defer.returnValue(summary)
+
+ # FIXME: order by stream ordering, not alphabetic
+
+ me = sync_config.user.to_string()
+ if (joined_user_ids or invited_user_ids):
+ summary['m.heroes'] = sorted(
+ [
+ user_id
+ for user_id in (joined_user_ids + invited_user_ids)
+ if user_id != me
+ ]
+ )[0:5]
+ else:
+ summary['m.heroes'] = sorted(
+ [user_id for user_id in member_ids.keys() if user_id != me]
+ )[0:5]
+
+ if not sync_config.filter_collection.lazy_load_members():
+ defer.returnValue(summary)
+
+ # ensure we send membership events for heroes if needed
+ cache_key = (sync_config.user.to_string(), sync_config.device_id)
+ cache = self.get_lazy_loaded_members_cache(cache_key)
+
+ # track which members the client should already know about via LL:
+ # Ones which are already in state...
+ existing_members = set(
+ user_id for (typ, user_id) in state.keys()
+ if typ == EventTypes.Member
+ )
+
+ # ...or ones which are in the timeline...
+ for ev in batch.events:
+ if ev.type == EventTypes.Member:
+ existing_members.add(ev.state_key)
+
+ # ...and then ensure any missing ones get included in state.
+ missing_hero_event_ids = [
+ member_ids[hero_id]
+ for hero_id in summary['m.heroes']
+ if (
+ cache.get(hero_id) != member_ids[hero_id] and
+ hero_id not in existing_members
+ )
+ ]
+
+ missing_hero_state = yield self.store.get_events(missing_hero_event_ids)
+ missing_hero_state = missing_hero_state.values()
+
+ for s in missing_hero_state:
+ cache.set(s.state_key, s.event_id)
+ state[(EventTypes.Member, s.state_key)] = s
+
+ defer.returnValue(summary)
+
+ def get_lazy_loaded_members_cache(self, cache_key):
+ cache = self.lazy_loaded_members_cache.get(cache_key)
+ if cache is None:
+ logger.debug("creating LruCache for %r", cache_key)
+ cache = LruCache(LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE)
+ self.lazy_loaded_members_cache[cache_key] = cache
+ else:
+ logger.debug("found LruCache for %r", cache_key)
+ return cache
+
+ @defer.inlineCallbacks
def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token,
full_state):
- """ Works out the differnce in state between the start of the timeline
+ """ Works out the difference in state between the start of the timeline
and the previous sync.
Args:
@@ -477,7 +654,7 @@ class SyncHandler(object):
full_state(bool): Whether to force returning the full state.
Returns:
- A deferred new event dictionary
+ A deferred dict of (type, state_key) -> Event
"""
# TODO(mjark) Check if the state events were received by the server
# after the previous sync, since we need to include those state
@@ -485,59 +662,130 @@ class SyncHandler(object):
# TODO(mjark) Check for new redactions in the state events.
with Measure(self.clock, "compute_state_delta"):
+
+ types = None
+ filtered_types = None
+
+ lazy_load_members = sync_config.filter_collection.lazy_load_members()
+ include_redundant_members = (
+ sync_config.filter_collection.include_redundant_members()
+ )
+
+ if lazy_load_members:
+ # We only request state for the members needed to display the
+ # timeline:
+
+ types = [
+ (EventTypes.Member, state_key)
+ for state_key in set(
+ event.sender # FIXME: we also care about invite targets etc.
+ for event in batch.events
+ )
+ ]
+
+ # only apply the filtering to room members
+ filtered_types = [EventTypes.Member]
+
+ timeline_state = {
+ (event.type, event.state_key): event.event_id
+ for event in batch.events if event.is_state()
+ }
+
if full_state:
if batch:
current_state_ids = yield self.store.get_state_ids_for_event(
- batch.events[-1].event_id
+ batch.events[-1].event_id, types=types,
+ filtered_types=filtered_types,
)
state_ids = yield self.store.get_state_ids_for_event(
- batch.events[0].event_id
+ batch.events[0].event_id, types=types,
+ filtered_types=filtered_types,
)
+
else:
current_state_ids = yield self.get_state_at(
- room_id, stream_position=now_token
+ room_id, stream_position=now_token, types=types,
+ filtered_types=filtered_types,
)
state_ids = current_state_ids
- timeline_state = {
- (event.type, event.state_key): event.event_id
- for event in batch.events if event.is_state()
- }
-
state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_ids,
previous={},
current=current_state_ids,
+ lazy_load_members=lazy_load_members,
)
elif batch.limited:
state_at_previous_sync = yield self.get_state_at(
- room_id, stream_position=since_token
+ room_id, stream_position=since_token, types=types,
+ filtered_types=filtered_types,
)
current_state_ids = yield self.store.get_state_ids_for_event(
- batch.events[-1].event_id
+ batch.events[-1].event_id, types=types,
+ filtered_types=filtered_types,
)
state_at_timeline_start = yield self.store.get_state_ids_for_event(
- batch.events[0].event_id
+ batch.events[0].event_id, types=types,
+ filtered_types=filtered_types,
)
- timeline_state = {
- (event.type, event.state_key): event.event_id
- for event in batch.events if event.is_state()
- }
-
state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_at_timeline_start,
previous=state_at_previous_sync,
current=current_state_ids,
+ lazy_load_members=lazy_load_members,
)
else:
state_ids = {}
+ if lazy_load_members:
+ if types:
+ # We're returning an incremental sync, with no "gap" since
+ # the previous sync, so normally there would be no state to return
+ # But we're lazy-loading, so the client might need some more
+ # member events to understand the events in this timeline.
+ # So we fish out all the member events corresponding to the
+ # timeline here, and then dedupe any redundant ones below.
+
+ state_ids = yield self.store.get_state_ids_for_event(
+ batch.events[0].event_id, types=types,
+ filtered_types=None, # we only want members!
+ )
+
+ if lazy_load_members and not include_redundant_members:
+ cache_key = (sync_config.user.to_string(), sync_config.device_id)
+ cache = self.get_lazy_loaded_members_cache(cache_key)
+
+ # if it's a new sync sequence, then assume the client has had
+ # amnesia and doesn't want any recent lazy-loaded members
+ # de-duplicated.
+ if since_token is None:
+ logger.debug("clearing LruCache for %r", cache_key)
+ cache.clear()
+ else:
+ # only send members which aren't in our LruCache (either
+ # because they're new to this client or have been pushed out
+ # of the cache)
+ logger.debug("filtering state from %r...", state_ids)
+ state_ids = {
+ t: event_id
+ for t, event_id in state_ids.iteritems()
+ if cache.get(t[1]) != event_id
+ }
+ logger.debug("...to %r", state_ids)
+
+ # add any member IDs we are about to send into our LruCache
+ for t, event_id in itertools.chain(
+ state_ids.items(),
+ timeline_state.items(),
+ ):
+ if t[0] == EventTypes.Member:
+ cache.set(t[1], event_id)
state = {}
if state_ids:
@@ -620,7 +868,7 @@ class SyncHandler(object):
since_token is None and
sync_config.filter_collection.blocks_all_presence()
)
- if not block_all_presence_data:
+ if self.hs_config.use_presence and not block_all_presence_data:
yield self._generate_sync_entry_for_presence(
sync_result_builder, newly_joined_rooms, newly_joined_users
)
@@ -1312,7 +1560,6 @@ class SyncHandler(object):
if events == [] and tags is None:
return
- since_token = sync_result_builder.since_token
now_token = sync_result_builder.now_token
sync_config = sync_result_builder.sync_config
@@ -1355,6 +1602,18 @@ class SyncHandler(object):
full_state=full_state
)
+ summary = {}
+ if (
+ sync_config.filter_collection.lazy_load_members() and
+ (
+ any(ev.type == EventTypes.Member for ev in batch.events) or
+ since_token is None
+ )
+ ):
+ summary = yield self.compute_summary(
+ room_id, sync_config, batch, state, now_token
+ )
+
if room_builder.rtype == "joined":
unread_notifications = {}
room_sync = JoinedSyncResult(
@@ -1364,6 +1623,7 @@ class SyncHandler(object):
ephemeral=ephemeral,
account_data=account_data_events,
unread_notifications=unread_notifications,
+ summary=summary,
)
if room_sync or always_include:
@@ -1448,7 +1708,9 @@ def _action_has_highlight(actions):
return False
-def _calculate_state(timeline_contains, timeline_start, previous, current):
+def _calculate_state(
+ timeline_contains, timeline_start, previous, current, lazy_load_members,
+):
"""Works out what state to include in a sync response.
Args:
@@ -1457,6 +1719,9 @@ def _calculate_state(timeline_contains, timeline_start, previous, current):
previous (dict): state at the end of the previous sync (or empty dict
if this is an initial sync)
current (dict): state at the end of the timeline
+ lazy_load_members (bool): whether to return members from timeline_start
+ or not. assumes that timeline_start has already been filtered to
+ include only the members the client needs to know about.
Returns:
dict
@@ -1472,9 +1737,25 @@ def _calculate_state(timeline_contains, timeline_start, previous, current):
}
c_ids = set(e for e in current.values())
- tc_ids = set(e for e in timeline_contains.values())
- p_ids = set(e for e in previous.values())
ts_ids = set(e for e in timeline_start.values())
+ p_ids = set(e for e in previous.values())
+ tc_ids = set(e for e in timeline_contains.values())
+
+ # If we are lazyloading room members, we explicitly add the membership events
+ # for the senders in the timeline into the state block returned by /sync,
+ # as we may not have sent them to the client before. We find these membership
+ # events by filtering them out of timeline_start, which has already been filtered
+ # to only include membership events for the senders in the timeline.
+ # In practice, we can do this by removing them from the p_ids list,
+ # which is the list of relevant state we know we have already sent to the client.
+ # see https://github.com/matrix-org/synapse/pull/2970
+ # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809
+
+ if lazy_load_members:
+ p_ids.difference_update(
+ e for t, e in timeline_start.iteritems()
+ if t[0] == EventTypes.Member
+ )
state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
|