From 9b334b3f97057ac145622d2e4d0ad036ef27b468 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Sun, 11 Mar 2018 20:01:41 +0000
Subject: WIP experiment in lazyloading room members

---
 synapse/handlers/sync.py | 43 +++++++++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 12 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0f713ce038..809e9fece9 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -399,7 +399,7 @@ class SyncHandler(object):
         ))
 
     @defer.inlineCallbacks
-    def get_state_after_event(self, event):
+    def get_state_after_event(self, event, types=None):
         """
         Get the room state after the given event
 
@@ -409,14 +409,14 @@ class SyncHandler(object):
         Returns:
             A Deferred map from ((type, state_key)->Event)
         """
-        state_ids = yield self.store.get_state_ids_for_event(event.event_id)
+        state_ids = yield self.store.get_state_ids_for_event(event.event_id, types)
         if event.is_state():
             state_ids = state_ids.copy()
             state_ids[(event.type, event.state_key)] = event.event_id
         defer.returnValue(state_ids)
 
     @defer.inlineCallbacks
-    def get_state_at(self, room_id, stream_position):
+    def get_state_at(self, room_id, stream_position, types=None):
         """ Get the room state at a particular stream position
 
         Args:
@@ -432,7 +432,7 @@ class SyncHandler(object):
 
         if last_events:
             last_event = last_events[-1]
-            state = yield self.get_state_after_event(last_event)
+            state = yield self.get_state_after_event(last_event, types)
 
         else:
             # no events in this room - so presumably no state
@@ -441,7 +441,7 @@ class SyncHandler(object):
 
     @defer.inlineCallbacks
     def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token,
-                            full_state):
+                            full_state, filter_members):
         """ Works out the differnce in state between the start of the timeline
         and the previous sync.
 
@@ -454,6 +454,8 @@ class SyncHandler(object):
                 be None.
             now_token(str): Token of the end of the current batch.
             full_state(bool): Whether to force returning the full state.
+            filter_members(bool): Whether to only return state for members
+                referenced in this timeline segment
 
         Returns:
              A deferred new event dictionary
@@ -464,18 +466,35 @@ class SyncHandler(object):
         # TODO(mjark) Check for new redactions in the state events.
 
         with Measure(self.clock, "compute_state_delta"):
+
+            types = None
+            if filter_members:
+                # We only request state for the members needed to display the
+                # timeline:
+                types = (
+                    (EventTypes.Member, state_key)
+                    for state_key in set(
+                        event.sender  # FIXME: we also care about targets etc.
+                        for event in batch.events
+                    )
+                )
+                types.append((None, None))  # don't just filter to room members
+
+                # TODO: we should opportunistically deduplicate these members too
+                # within the same sync series (based on an in-memory cache)
+
             if full_state:
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[-1].event_id
+                        batch.events[-1].event_id, types=types
                     )
 
                     state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[0].event_id
+                        batch.events[0].event_id, types=types
                     )
                 else:
                     current_state_ids = yield self.get_state_at(
-                        room_id, stream_position=now_token
+                        room_id, stream_position=now_token, types=types
                     )
 
                     state_ids = current_state_ids
@@ -493,15 +512,15 @@ class SyncHandler(object):
                 )
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
-                    room_id, stream_position=since_token
+                    room_id, stream_position=since_token, types=types
                 )
 
                 current_state_ids = yield self.store.get_state_ids_for_event(
-                    batch.events[-1].event_id
+                    batch.events[-1].event_id, types=types
                 )
 
                 state_at_timeline_start = yield self.store.get_state_ids_for_event(
-                    batch.events[0].event_id
+                    batch.events[0].event_id, types=types
                 )
 
                 timeline_state = {
@@ -1325,7 +1344,7 @@ class SyncHandler(object):
 
         state = yield self.compute_state_delta(
             room_id, batch, sync_config, since_token, now_token,
-            full_state=full_state
+            full_state=full_state, filter_members=True
         )
 
         if room_builder.rtype == "joined":
-- 
cgit 1.4.1


From 87133652657c5073616419b0afc533eac6ae6750 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Sun, 11 Mar 2018 20:10:25 +0000
Subject: typos

---
 synapse/handlers/sync.py | 4 ++--
 synapse/storage/state.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 809e9fece9..fa730ca760 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -471,13 +471,13 @@ class SyncHandler(object):
             if filter_members:
                 # We only request state for the members needed to display the
                 # timeline:
-                types = (
+                types = [
                     (EventTypes.Member, state_key)
                     for state_key in set(
                         event.sender  # FIXME: we also care about targets etc.
                         for event in batch.events
                     )
-                )
+                ]
                 types.append((None, None))  # don't just filter to room members
 
                 # TODO: we should opportunistically deduplicate these members too
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index da6bb685fa..0238200286 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -301,6 +301,8 @@ class StateGroupWorkerStore(SQLBaseStore):
                     args = [next_group]
                     if types:
                         args.extend(i for typ in types for i in typ)
+                        if include_other_types:
+                            args.extend(typ for (typ, _) in types)
 
                     txn.execute(
                         "SELECT type, state_key, event_id FROM state_groups_state"
-- 
cgit 1.4.1


From 14a9d2f73d50225f190f42e270cbf9ef7447bd8c Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 13 Mar 2018 22:03:42 +0000
Subject: ensure we always include the members for a given timeline block

---
 synapse/handlers/sync.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index fa730ca760..c754cfdeeb 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -468,6 +468,8 @@ class SyncHandler(object):
         with Measure(self.clock, "compute_state_delta"):
 
             types = None
+            member_state_ids = {}
+
             if filter_members:
                 # We only request state for the members needed to display the
                 # timeline:
@@ -492,6 +494,13 @@ class SyncHandler(object):
                     state_ids = yield self.store.get_state_ids_for_event(
                         batch.events[0].event_id, types=types
                     )
+
+                    if filter_members:
+                        member_state_ids = {
+                            t: state_ids[t]
+                            for t in state_ids if t[0] == EventTypes.member
+                        }
+
                 else:
                     current_state_ids = yield self.get_state_at(
                         room_id, stream_position=now_token, types=types
@@ -499,6 +508,12 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
+                    if filter_members:
+                        member_state_ids = {
+                            t: state_ids[t]
+                            for t in state_ids if t[0] == EventTypes.member
+                        }
+
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -507,6 +522,7 @@ class SyncHandler(object):
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_ids,
+                    timeline_start_members=member_state_ids,
                     previous={},
                     current=current_state_ids,
                 )
@@ -523,6 +539,12 @@ class SyncHandler(object):
                     batch.events[0].event_id, types=types
                 )
 
+                if filter_members:
+                    member_state_ids = {
+                        t: state_at_timeline_start[t]
+                        for t in state_ids if t[0] == EventTypes.member
+                    }
+
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -531,6 +553,7 @@ class SyncHandler(object):
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_at_timeline_start,
+                    timeline_start_members=member_state_ids,
                     previous=state_at_previous_sync,
                     current=current_state_ids,
                 )
@@ -1440,12 +1463,16 @@ def _action_has_highlight(actions):
     return False
 
 
-def _calculate_state(timeline_contains, timeline_start, previous, current):
+def _calculate_state(timeline_contains, timeline_start, timeline_start_members,
+                     previous, current):
     """Works out what state to include in a sync response.
 
     Args:
         timeline_contains (dict): state in the timeline
         timeline_start (dict): state at the start of the timeline
+        timeline_start_members (dict): state at the start of the timeline
+            for room members who participate in this chunk of timeline.
+            Should always be a subset of timeline_start.
         previous (dict): state at the end of the previous sync (or empty dict
             if this is an initial sync)
         current (dict): state at the end of the timeline
@@ -1464,11 +1491,12 @@ def _calculate_state(timeline_contains, timeline_start, previous, current):
     }
 
     c_ids = set(e for e in current.values())
-    tc_ids = set(e for e in timeline_contains.values())
-    p_ids = set(e for e in previous.values())
     ts_ids = set(e for e in timeline_start.values())
+    tsm_ids = set(e for e in timeline_start_members.values())
+    p_ids = set(e for e in previous.values())
+    tc_ids = set(e for e in timeline_contains.values())
 
-    state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
+    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids
 
     return {
         event_id_to_key[e]: e for e in state_ids
-- 
cgit 1.4.1


From ccca02846d07124f537b0c475308f9a26bfb3fb1 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 13 Mar 2018 22:31:41 +0000
Subject: make it work

---
 synapse/handlers/sync.py |  6 +++---
 synapse/storage/state.py | 10 ++++++----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index c754cfdeeb..c05e3d107f 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -498,7 +498,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.member
+                            for t in state_ids if t[0] == EventTypes.Member
                         }
 
                 else:
@@ -511,7 +511,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.member
+                            for t in state_ids if t[0] == EventTypes.Member
                         }
 
                 timeline_state = {
@@ -542,7 +542,7 @@ class SyncHandler(object):
                 if filter_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
-                        for t in state_ids if t[0] == EventTypes.member
+                        for t in state_ids if t[0] == EventTypes.Member
                     }
 
                 timeline_state = {
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 4291cde7ab..9c9994c073 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -257,10 +257,11 @@ class StateGroupWorkerStore(SQLBaseStore):
                 if include_other_types:
                     # XXX: check whether this slows postgres down like a list of
                     # ORs does too?
+                    unique_types = set([ t for (t, _) in types ])
                     clause_to_args.append(
                         (
-                            "AND type <> ? " * len(types),
-                            [t for (t, _) in types]
+                            "AND type <> ? " * len(unique_types),
+                            list(unique_types)
                         )
                     )
             else:
@@ -293,10 +294,11 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_args.extend([typ[0], typ[1]])
 
                 if include_other_types:
+                    unique_types = set([ t for (t, _) in types ])                    
                     where_clauses.append(
-                        "(" + " AND ".join(["type <> ?"] * len(types)) + ")"
+                        "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
                     )
-                    where_args.extend(t for (t, _) in types)
+                    where_args.extend(list(unique_types))
 
                 where_clause = "AND (%s)" % (" OR ".join(where_clauses))
             else:
-- 
cgit 1.4.1


From c9d72e4571752554dfe01d755ae23f55c5f84ade Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 13 Mar 2018 23:46:45 +0000
Subject: oops

---
 synapse/handlers/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index c05e3d107f..887624c431 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -542,7 +542,7 @@ class SyncHandler(object):
                 if filter_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
-                        for t in state_ids if t[0] == EventTypes.Member
+                        for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
 
                 timeline_state = {
-- 
cgit 1.4.1


From 4d0cfef6ee023bfe83113a0378321830ebde1619 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Wed, 14 Mar 2018 00:02:20 +0000
Subject: add copyright to nudge CI

---
 synapse/handlers/sync.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 887624c431..edbd2ae771 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright 2015 - 2016 OpenMarket Ltd
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
-- 
cgit 1.4.1


From 3bc5bd2d22e6b53ec1f89760301df1517e71b53a Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 00:52:04 +0000
Subject: make incr syncs work

---
 synapse/handlers/sync.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index edbd2ae771..84c894ca4a 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -499,7 +499,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.Member
+                            for t in state_ids if state_ids[t][0] == EventTypes.Member
                         }
 
                 else:
@@ -512,7 +512,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.Member
+                            for t in state_ids if state_ids[t][0] == EventTypes.Member
                         }
 
                 timeline_state = {
@@ -543,7 +543,8 @@ class SyncHandler(object):
                 if filter_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
-                        for t in state_at_timeline_start if t[0] == EventTypes.Member
+                        for t in state_at_timeline_start
+                        if state_at_timeline_start[t][0] == EventTypes.Member
                     }
 
                 timeline_state = {
-- 
cgit 1.4.1


From 5b3b3aada8952b53f82723227c9758ed47450a2e Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:17:34 +0000
Subject: simplify timeline_start_members

---
 synapse/handlers/sync.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 84c894ca4a..ffb4f7915e 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -496,12 +496,6 @@ class SyncHandler(object):
                         batch.events[0].event_id, types=types
                     )
 
-                    if filter_members:
-                        member_state_ids = {
-                            t: state_ids[t]
-                            for t in state_ids if state_ids[t][0] == EventTypes.Member
-                        }
-
                 else:
                     current_state_ids = yield self.get_state_at(
                         room_id, stream_position=now_token, types=types
@@ -509,11 +503,13 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
-                    if filter_members:
-                        member_state_ids = {
-                            t: state_ids[t]
-                            for t in state_ids if state_ids[t][0] == EventTypes.Member
-                        }
+                if filter_members:
+                    logger.info("Finding members from %r", state_ids)
+                    member_state_ids = {
+                        e: state_ids[e]
+                        for e in state_ids if state_ids[e][0] == EventTypes.Member
+                    }
+                    logger.info("Found members %r", member_state_ids)
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -541,11 +537,14 @@ class SyncHandler(object):
                 )
 
                 if filter_members:
+                    logger.info("Finding members from %r", state_at_timeline_start)
                     member_state_ids = {
-                        t: state_at_timeline_start[t]
-                        for t in state_at_timeline_start
-                        if state_at_timeline_start[t][0] == EventTypes.Member
+                        e: state_at_timeline_start[e]
+                        for e in state_at_timeline_start
+                        if state_at_timeline_start[e][0] == EventTypes.Member
                     }
+                    logger.info("Found members %r", member_state_ids)
+
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
-- 
cgit 1.4.1


From f7dcc404f216383bfd62e4611c6a28c3f13576dc Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:37:53 +0000
Subject: add state_ids for timeline entries

---
 synapse/handlers/sync.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index ffb4f7915e..9b7e598e74 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -474,6 +474,7 @@ class SyncHandler(object):
             if filter_members:
                 # We only request state for the members needed to display the
                 # timeline:
+
                 types = [
                     (EventTypes.Member, state_key)
                     for state_key in set(
@@ -481,11 +482,14 @@ class SyncHandler(object):
                         for event in batch.events
                     )
                 ]
-                types.append((None, None))  # don't just filter to room members
 
-                # TODO: we should opportunistically deduplicate these members too
+                # TODO: we should opportunistically deduplicate these members here
                 # within the same sync series (based on an in-memory cache)
 
+                if not types:
+                    filter_members = False
+                types.append((None, None))  # don't just filter to room members
+
             if full_state:
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
@@ -545,7 +549,6 @@ class SyncHandler(object):
                     }
                     logger.info("Found members %r", member_state_ids)
 
-
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -559,7 +562,14 @@ class SyncHandler(object):
                     current=current_state_ids,
                 )
             else:
-                state_ids = {}
+                if filter_members:
+                    # strip off the (None, None) and filter to just room members
+                    types = types[:-1]
+                    state_ids = yield self.store.get_state_ids_for_event(
+                        batch.events[0].event_id, types=types
+                    )
+                else:
+                    state_ids = {}
 
         state = {}
         if state_ids:
-- 
cgit 1.4.1


From 4f0493c850d4611e8ada42c1de54a18e8dc15a37 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:43:37 +0000
Subject: fix tsm search again

---
 synapse/handlers/sync.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 9b7e598e74..4bf85a128f 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -510,8 +510,8 @@ class SyncHandler(object):
                 if filter_members:
                     logger.info("Finding members from %r", state_ids)
                     member_state_ids = {
-                        e: state_ids[e]
-                        for e in state_ids if state_ids[e][0] == EventTypes.Member
+                        t: state_ids[t]
+                        for t in state_ids if t[0] == EventTypes.Member
                     }
                     logger.info("Found members %r", member_state_ids)
 
@@ -543,9 +543,8 @@ class SyncHandler(object):
                 if filter_members:
                     logger.info("Finding members from %r", state_at_timeline_start)
                     member_state_ids = {
-                        e: state_at_timeline_start[e]
-                        for e in state_at_timeline_start
-                        if state_at_timeline_start[e][0] == EventTypes.Member
+                        t: state_at_timeline_start[t]
+                        for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
                     logger.info("Found members %r", member_state_ids)
 
-- 
cgit 1.4.1


From fc5397fdf5acefd33bd3b808b6d8cc7c31b69b55 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:44:55 +0000
Subject: remove debug

---
 synapse/handlers/sync.py | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4bf85a128f..b7f42bd594 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -508,12 +508,10 @@ class SyncHandler(object):
                     state_ids = current_state_ids
 
                 if filter_members:
-                    logger.info("Finding members from %r", state_ids)
                     member_state_ids = {
                         t: state_ids[t]
                         for t in state_ids if t[0] == EventTypes.Member
                     }
-                    logger.info("Found members %r", member_state_ids)
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -541,12 +539,10 @@ class SyncHandler(object):
                 )
 
                 if filter_members:
-                    logger.info("Finding members from %r", state_at_timeline_start)
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
-                    logger.info("Found members %r", member_state_ids)
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
-- 
cgit 1.4.1


From 366f730bf697fe8fbb18a509ec1852987bc80410 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Sun, 18 Mar 2018 21:40:35 +0000
Subject: only get member state IDs for incremental syncs if we're filtering

---
 synapse/handlers/sync.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index b7f42bd594..6b57afd97b 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -557,14 +557,14 @@ class SyncHandler(object):
                     current=current_state_ids,
                 )
             else:
+                state_ids = {}
                 if filter_members:
                     # strip off the (None, None) and filter to just room members
                     types = types[:-1]
-                    state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[0].event_id, types=types
-                    )
-                else:
-                    state_ids = {}
+                    if types:
+                        state_ids = yield self.store.get_state_ids_for_event(
+                            batch.events[0].event_id, types=types
+                        )
 
         state = {}
         if state_ids:
-- 
cgit 1.4.1


From 478af0f72005708dbbed23e30c547c3d66c07c0e Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 19 Mar 2018 01:00:12 +0000
Subject: reshuffle todo & comments

---
 synapse/handlers/sync.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 6b57afd97b..76f5057377 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -483,11 +483,15 @@ class SyncHandler(object):
                     )
                 ]
 
-                # TODO: we should opportunistically deduplicate these members here
-                # within the same sync series (based on an in-memory cache)
+                # We can't remove redundant member types at this stage as it has
+                # to be done based on event_id, and we don't have the member
+                # event ids until we've pulled them out of the DB.
 
                 if not types:
+                    # an optimisation to stop needlessly trying to calculate
+                    # member_state_ids
                     filter_members = False
+
                 types.append((None, None))  # don't just filter to room members
 
             if full_state:
@@ -559,6 +563,10 @@ class SyncHandler(object):
             else:
                 state_ids = {}
                 if filter_members:
+                    # TODO: filter out redundant members based on their mxids (not their
+                    # event_ids) at this point. We know we can do it based on mxid as this
+                    # is an non-gappy incremental sync.
+
                     # strip off the (None, None) and filter to just room members
                     types = types[:-1]
                     if types:
-- 
cgit 1.4.1


From b2f22829475ccfe19e994aedddb8d04995018bf4 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 19 Mar 2018 01:15:13 +0000
Subject: make lazy_load_members configurable in filters

---
 synapse/api/filtering.py |  6 ++++++
 synapse/handlers/sync.py | 18 +++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 83206348e5..339e4a31d6 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -260,6 +260,9 @@ class FilterCollection(object):
     def ephemeral_limit(self):
         return self._room_ephemeral_filter.limit()
 
+    def lazy_load_members(self):
+        return self._room_state_filter.lazy_load_members()
+
     def filter_presence(self, events):
         return self._presence_filter.filter(events)
 
@@ -416,6 +419,9 @@ class Filter(object):
     def limit(self):
         return self.filter_json.get("limit", 10)
 
+    def lazy_load_members(self):
+        return self.filter_json.get("lazy_load_members", False)
+
 
 def _matches_wildcard(actual_value, filter_value):
     if filter_value.endswith("*"):
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 76f5057377..f521d22e91 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -442,7 +442,7 @@ class SyncHandler(object):
 
     @defer.inlineCallbacks
     def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token,
-                            full_state, filter_members):
+                            full_state):
         """ Works out the differnce in state between the start of the timeline
         and the previous sync.
 
@@ -455,7 +455,7 @@ class SyncHandler(object):
                 be None.
             now_token(str): Token of the end of the current batch.
             full_state(bool): Whether to force returning the full state.
-            filter_members(bool): Whether to only return state for members
+            lazy_load_members(bool): Whether to only return state for members
                 referenced in this timeline segment
 
         Returns:
@@ -470,8 +470,9 @@ class SyncHandler(object):
 
             types = None
             member_state_ids = {}
+            lazy_load_members = sync_config.filter_collection.lazy_load_members()
 
-            if filter_members:
+            if lazy_load_members:
                 # We only request state for the members needed to display the
                 # timeline:
 
@@ -490,7 +491,7 @@ class SyncHandler(object):
                 if not types:
                     # an optimisation to stop needlessly trying to calculate
                     # member_state_ids
-                    filter_members = False
+                    lazy_load_members = False
 
                 types.append((None, None))  # don't just filter to room members
 
@@ -511,7 +512,7 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
-                if filter_members:
+                if lazy_load_members:
                     member_state_ids = {
                         t: state_ids[t]
                         for t in state_ids if t[0] == EventTypes.Member
@@ -542,7 +543,7 @@ class SyncHandler(object):
                     batch.events[0].event_id, types=types
                 )
 
-                if filter_members:
+                if lazy_load_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
@@ -562,7 +563,7 @@ class SyncHandler(object):
                 )
             else:
                 state_ids = {}
-                if filter_members:
+                if lazy_load_members:
                     # TODO: filter out redundant members based on their mxids (not their
                     # event_ids) at this point. We know we can do it based on mxid as this
                     # is an non-gappy incremental sync.
@@ -1380,8 +1381,7 @@ class SyncHandler(object):
             return
 
         state = yield self.compute_state_delta(
-            room_id, batch, sync_config, since_token, now_token,
-            full_state=full_state, filter_members=True
+            room_id, batch, sync_config, since_token, now_token, full_state=full_state
         )
 
         if room_builder.rtype == "joined":
-- 
cgit 1.4.1


From a6c8f7c875348ff8d63a7032c2f73a08551c516c Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 29 May 2018 01:09:55 +0100
Subject: add pydoc

---
 synapse/handlers/sync.py | 18 ++++++++----
 synapse/storage/state.py | 76 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 67 insertions(+), 27 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 05bf6d46dd..8e38078332 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -423,7 +423,11 @@ class SyncHandler(object):
 
         Args:
             event(synapse.events.EventBase): event of interest
-
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
         Returns:
             A Deferred map from ((type, state_key)->Event)
         """
@@ -440,6 +444,11 @@ class SyncHandler(object):
         Args:
             room_id(str): room for which to get state
             stream_position(StreamToken): point at which to get state
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A Deferred map from ((type, state_key)->Event)
@@ -472,8 +481,6 @@ class SyncHandler(object):
                 be None.
             now_token(str): Token of the end of the current batch.
             full_state(bool): Whether to force returning the full state.
-            lazy_load_members(bool): Whether to only return state for members
-                referenced in this timeline segment
 
         Returns:
              A deferred new event dictionary
@@ -496,7 +503,7 @@ class SyncHandler(object):
                 types = [
                     (EventTypes.Member, state_key)
                     for state_key in set(
-                        event.sender  # FIXME: we also care about targets etc.
+                        event.sender  # FIXME: we also care about invite targets etc.
                         for event in batch.events
                     )
                 ]
@@ -1398,7 +1405,8 @@ class SyncHandler(object):
             return
 
         state = yield self.compute_state_delta(
-            room_id, batch, sync_config, since_token, now_token, full_state=full_state
+            room_id, batch, sync_config, since_token, now_token,
+            full_state=full_state
         )
 
         if room_builder.rtype == "joined":
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 55159e64d0..63b6834202 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -182,7 +182,19 @@ class StateGroupWorkerStore(SQLBaseStore):
 
     @defer.inlineCallbacks
     def _get_state_groups_from_groups(self, groups, types):
-        """Returns dictionary state_group -> (dict of (type, state_key) -> event id)
+        """Returns the state groups for a given set of groups, filtering on
+        types of state events.
+
+        Args:
+            groups(list[int]): list of state group IDs to query
+            types(list[str|None, str|None])|None: List of 2-tuples of the form
+                (`type`, `state_key`), where a `state_key` of `None` matches all
+                state_keys for the `type`. Presence of type of `None` indicates
+                that types not in the list should not be filtered out. If None,
+                all types are returned.
+
+        Returns:
+            dictionary state_group -> (dict of (type, state_key) -> event id)
         """
         results = {}
 
@@ -204,6 +216,9 @@ class StateGroupWorkerStore(SQLBaseStore):
         if types is not None:
             type_set = set(types)
             if (None, None) in type_set:
+                # special case (None, None) to mean that other types should be
+                # returned - i.e. we were just filtering down the state keys
+                # for particular types.
                 include_other_types = True
                 type_set.remove((None, None))
             types = list(type_set)  # deduplicate types list
@@ -360,10 +375,12 @@ class StateGroupWorkerStore(SQLBaseStore):
         that are in the `types` list.
 
         Args:
-            event_ids (list)
-            types (list): List of (type, state_key) tuples which are used to
-                filter the state fetched. `state_key` may be None, which matches
-                any `state_key`
+            event_ids (list[string])
+            types (list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             deferred: A list of dicts corresponding to the event_ids given.
@@ -399,9 +416,11 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             event_ids(list(str)): events whose state should be returned
-            types(list[(str, str)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. May be None, which
-                matches any key
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A deferred dict from event_id -> (type, state_key) -> state_event
@@ -427,9 +446,11 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str, str)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. May be None, which
-                matches any key
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
@@ -444,9 +465,11 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str, str)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. May be None, which
-                matches any key
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
@@ -492,11 +515,11 @@ class StateGroupWorkerStore(SQLBaseStore):
         missing state.
 
         Args:
-            group: The state group to lookup
-            types (list): List of 2-tuples of the form (`type`, `state_key`),
-                where a `state_key` of `None` matches all state_keys for the
-                `type`. Presence of type of `None` indicates that types not
-                in the list should not be filtered out.
+            group(int): The state group to lookup
+            types(list[str|None, str|None]): List of 2-tuples of the form
+                (`type`, `state_key`), where a `state_key` of `None` matches all
+                state_keys for the `type`. Presence of type of `None` indicates
+                that types not in the list should not be filtered out.
         """
         is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
 
@@ -560,9 +583,18 @@ class StateGroupWorkerStore(SQLBaseStore):
     @defer.inlineCallbacks
     def _get_state_for_groups(self, groups, types=None):
         """Given list of groups returns dict of group -> list of state events
-        with matching types. `types` is a list of `(type, state_key)`, where
-        a `state_key` of None matches all state_keys. If `types` is None then
-        all events are returned.
+        with matching types.
+
+        Args:
+            groups(list[int]): list of groups whose state to query
+            types(list[str|None, str|None]|None): List of 2-tuples of the form
+                (`type`, `state_key`), where a `state_key` of `None` matches all
+                state_keys for the `type`. Presence of type of `None` indicates
+                that types not in the list should not be filtered out. If None,
+                all events are returned.
+
+        Returns:
+            dict of group -> list of state events
         """
         if types:
             types = frozenset(types)
-- 
cgit 1.4.1


From 5f6122fe102f994e023d530cb6076730f31f619f Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 4 Jun 2018 00:08:52 +0300
Subject: more comments

---
 synapse/handlers/sync.py | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 8e38078332..7ab97b24a6 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -515,6 +515,9 @@ class SyncHandler(object):
                 if not types:
                     # an optimisation to stop needlessly trying to calculate
                     # member_state_ids
+                    #
+                    # XXX: i can't remember what this trying to do. why would
+                    # types ever be []? --matthew
                     lazy_load_members = False
 
                 types.append((None, None))  # don't just filter to room members
@@ -568,6 +571,10 @@ class SyncHandler(object):
                 )
 
                 if lazy_load_members:
+                    # TODO: filter out redundant members based on their event_ids
+                    # (not mxids) at this point. In practice, limited syncs are
+                    # relatively rare so it's not a total disaster to send redundant
+                    # members down at this point.
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
-- 
cgit 1.4.1


From 94700e55fa93653d678fb5e27322fde4c0a15f3d Mon Sep 17 00:00:00 2001
From: Michael Telatynski <7t3chguy@gmail.com>
Date: Wed, 13 Jun 2018 10:31:01 +0100
Subject: if inviter_display_name == ""||None then default to inviter MXID to
 prevent email invite from "None"

---
 synapse/handlers/room_member.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index b3f979b246..1b8dfa8254 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -565,6 +565,10 @@ class RoomMemberHandler(BaseHandler):
             inviter_display_name = member_event.content.get("displayname", "")
             inviter_avatar_url = member_event.content.get("avatar_url", "")
 
+        # if user has no display name, default to their MXID
+        if not inviter_display_name:
+            inviter_display_name = user.to_string()
+
         canonical_room_alias = ""
         canonical_alias_event = room_state.get((EventTypes.CanonicalAlias, ""))
         if canonical_alias_event:
-- 
cgit 1.4.1


From 924eb34d9428a4163a03249abbb6f40d4baa29c6 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Thu, 19 Jul 2018 18:32:02 +0100
Subject: add a filtered_types param to limit filtering to specific types

---
 synapse/handlers/sync.py |  65 +++++++++++++++------------
 synapse/storage/state.py | 113 +++++++++++++++++++++++++----------------------
 2 files changed, 96 insertions(+), 82 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0c21ac2c77..cb711b8758 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -417,38 +417,44 @@ class SyncHandler(object):
         ))
 
     @defer.inlineCallbacks
-    def get_state_after_event(self, event, types=None):
+    def get_state_after_event(self, event, types=None, filtered_types=None):
         """
         Get the room state after the given event
 
         Args:
             event(synapse.events.EventBase): event of interest
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
+
         Returns:
             A Deferred map from ((type, state_key)->Event)
         """
-        state_ids = yield self.store.get_state_ids_for_event(event.event_id, types)
+        state_ids = yield self.store.get_state_ids_for_event(
+            event.event_id, types, filtered_types=filtered_types
+        )
         if event.is_state():
             state_ids = state_ids.copy()
             state_ids[(event.type, event.state_key)] = event.event_id
         defer.returnValue(state_ids)
 
     @defer.inlineCallbacks
-    def get_state_at(self, room_id, stream_position, types=None):
+    def get_state_at(self, room_id, stream_position, types=None, filtered_types=None):
         """ Get the room state at a particular stream position
 
         Args:
             room_id(str): room for which to get state
             stream_position(StreamToken): point at which to get state
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
-                May be None, which matches any key.
+                all events are returned of the given type.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A Deferred map from ((type, state_key)->Event)
@@ -463,7 +469,9 @@ class SyncHandler(object):
 
         if last_events:
             last_event = last_events[-1]
-            state = yield self.get_state_after_event(last_event, types)
+            state = yield self.get_state_after_event(
+                last_event, types, filtered_types=filtered_types
+            )
 
         else:
             # no events in this room - so presumably no state
@@ -499,6 +507,7 @@ class SyncHandler(object):
             types = None
             member_state_ids = {}
             lazy_load_members = sync_config.filter_collection.lazy_load_members()
+            filtered_types = None
 
             if lazy_load_members:
                 # We only request state for the members needed to display the
@@ -516,29 +525,25 @@ class SyncHandler(object):
                 # to be done based on event_id, and we don't have the member
                 # event ids until we've pulled them out of the DB.
 
-                if not types:
-                    # an optimisation to stop needlessly trying to calculate
-                    # member_state_ids
-                    #
-                    # XXX: i can't remember what this trying to do. why would
-                    # types ever be []? --matthew
-                    lazy_load_members = False
-
-                types.append((None, None))  # don't just filter to room members
+                # only apply the filtering to room members
+                filtered_types = [EventTypes.Member]
 
             if full_state:
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[-1].event_id, types=types
+                        batch.events[-1].event_id, types=types,
+                        filtered_types=filtered_types
                     )
 
                     state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[0].event_id, types=types
+                        batch.events[0].event_id, types=types,
+                        filtered_types=filtered_types
                     )
 
                 else:
                     current_state_ids = yield self.get_state_at(
-                        room_id, stream_position=now_token, types=types
+                        room_id, stream_position=now_token, types=types,
+                        filtered_types=filtered_types
                     )
 
                     state_ids = current_state_ids
@@ -563,15 +568,18 @@ class SyncHandler(object):
                 )
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
-                    room_id, stream_position=since_token, types=types
+                    room_id, stream_position=since_token, types=types,
+                    filtered_types=filtered_types
                 )
 
                 current_state_ids = yield self.store.get_state_ids_for_event(
-                    batch.events[-1].event_id, types=types
+                    batch.events[-1].event_id, types=types,
+                    filtered_types=filtered_types
                 )
 
                 state_at_timeline_start = yield self.store.get_state_ids_for_event(
-                    batch.events[0].event_id, types=types
+                    batch.events[0].event_id, types=types,
+                    filtered_types=filtered_types
                 )
 
                 if lazy_load_members:
@@ -603,11 +611,10 @@ class SyncHandler(object):
                     # event_ids) at this point. We know we can do it based on mxid as this
                     # is an non-gappy incremental sync.
 
-                    # strip off the (None, None) and filter to just room members
-                    types = types[:-1]
                     if types:
                         state_ids = yield self.store.get_state_ids_for_event(
-                            batch.events[0].event_id, types=types
+                            batch.events[0].event_id, types=types,
+                            filtered_types=filtered_types
                         )
 
         state = {}
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index c5ff44fef7..ee531a2ce0 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         })
 
     @defer.inlineCallbacks
-    def _get_state_groups_from_groups(self, groups, types):
+    def _get_state_groups_from_groups(self, groups, types, filtered_types=None):
         """Returns the state groups for a given set of groups, filtering on
         types of state events.
 
@@ -193,9 +193,10 @@ class StateGroupWorkerStore(SQLBaseStore):
             groups(list[int]): list of state group IDs to query
             types(list[str|None, str|None])|None: List of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
-                state_keys for the `type`. Presence of type of `None` indicates
-                that types not in the list should not be filtered out. If None,
-                all types are returned.
+                state_keys for the `type`. If None, all types are returned.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             dictionary state_group -> (dict of (type, state_key) -> event id)
@@ -206,26 +207,21 @@ class StateGroupWorkerStore(SQLBaseStore):
         for chunk in chunks:
             res = yield self.runInteraction(
                 "_get_state_groups_from_groups",
-                self._get_state_groups_from_groups_txn, chunk, types,
+                self._get_state_groups_from_groups_txn, chunk, types, filtered_types
             )
             results.update(res)
 
         defer.returnValue(results)
 
-    def _get_state_groups_from_groups_txn(self, txn, groups, types=None):
+    def _get_state_groups_from_groups_txn(
+        self, txn, groups, types=None, filtered_types=None
+    ):
         results = {group: {} for group in groups}
 
-        include_other_types = False
+        include_other_types = False if filtered_types is None else True
 
         if types is not None:
-            type_set = set(types)
-            if (None, None) in type_set:
-                # special case (None, None) to mean that other types should be
-                # returned - i.e. we were just filtering down the state keys
-                # for particular types.
-                include_other_types = True
-                type_set.remove((None, None))
-            types = list(type_set)  # deduplicate types list
+            types = list(set(types))  # deduplicate types list
 
         if isinstance(self.database_engine, PostgresEngine):
             # Temporarily disable sequential scans in this transaction. This is
@@ -276,7 +272,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                 if include_other_types:
                     # XXX: check whether this slows postgres down like a list of
                     # ORs does too?
-                    unique_types = set([t for (t, _) in types])
+                    unique_types = set(filtered_types)
                     clause_to_args.append(
                         (
                             "AND type <> ? " * len(unique_types),
@@ -313,7 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_args.extend([typ[0], typ[1]])
 
                 if include_other_types:
-                    unique_types = set([t for (t, _) in types])
+                    unique_types = set(filtered_types)
                     where_clauses.append(
                         "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
                     )
@@ -373,18 +369,20 @@ class StateGroupWorkerStore(SQLBaseStore):
         return results
 
     @defer.inlineCallbacks
-    def get_state_for_events(self, event_ids, types):
+    def get_state_for_events(self, event_ids, types, filtered_types):
         """Given a list of event_ids and type tuples, return a list of state
         dicts for each event. The state dicts will only have the type/state_keys
         that are in the `types` list.
 
         Args:
             event_ids (list[string])
-            types (list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types (list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             deferred: A list of dicts corresponding to the event_ids given.
@@ -395,7 +393,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         )
 
         groups = set(itervalues(event_to_groups))
-        group_to_state = yield self._get_state_for_groups(groups, types)
+        group_to_state = yield self._get_state_for_groups(groups, types, filtered_types)
 
         state_event_map = yield self.get_events(
             [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)],
@@ -414,17 +412,19 @@ class StateGroupWorkerStore(SQLBaseStore):
         defer.returnValue({event: event_to_state[event] for event in event_ids})
 
     @defer.inlineCallbacks
-    def get_state_ids_for_events(self, event_ids, types=None):
+    def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None):
         """
         Get the state dicts corresponding to a list of events
 
         Args:
             event_ids(list(str)): events whose state should be returned
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A deferred dict from event_id -> (type, state_key) -> state_event
@@ -434,7 +434,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         )
 
         groups = set(itervalues(event_to_groups))
-        group_to_state = yield self._get_state_for_groups(groups, types)
+        group_to_state = yield self._get_state_for_groups(groups, types, filtered_types)
 
         event_to_state = {
             event_id: group_to_state[group]
@@ -444,41 +444,45 @@ class StateGroupWorkerStore(SQLBaseStore):
         defer.returnValue({event: event_to_state[event] for event in event_ids})
 
     @defer.inlineCallbacks
-    def get_state_for_event(self, event_id, types=None):
+    def get_state_for_event(self, event_id, types=None, filtered_types=None):
         """
         Get the state dict corresponding to a particular event
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
         """
-        state_map = yield self.get_state_for_events([event_id], types)
+        state_map = yield self.get_state_for_events([event_id], types, filtered_types)
         defer.returnValue(state_map[event_id])
 
     @defer.inlineCallbacks
-    def get_state_ids_for_event(self, event_id, types=None):
+    def get_state_ids_for_event(self, event_id, types=None, filtered_types=None):
         """
         Get the state dict corresponding to a particular event
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
         """
-        state_map = yield self.get_state_ids_for_events([event_id], types)
+        state_map = yield self.get_state_ids_for_events([event_id], types, filtered_types)
         defer.returnValue(state_map[event_id])
 
     @cached(max_entries=50000)
@@ -509,7 +513,7 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         defer.returnValue({row["event_id"]: row["state_group"] for row in rows})
 
-    def _get_some_state_from_cache(self, group, types):
+    def _get_some_state_from_cache(self, group, types, filtered_types=None):
         """Checks if group is in cache. See `_get_state_for_groups`
 
         Returns 3-tuple (`state_dict`, `missing_types`, `got_all`).
@@ -520,29 +524,30 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             group(int): The state group to lookup
-            types(list[str|None, str|None]): List of 2-tuples of the form
+            types(list[str, str|None]): List of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
-                state_keys for the `type`. Presence of type of `None` indicates
-                that types not in the list should not be filtered out.
+                state_keys for the `type`.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
         """
         is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
 
         type_to_key = {}
+
+        # tracks which of the requested types are missing from our cache
         missing_types = set()
 
-        include_other_types = False
+        include_other_types = True if filtered_types is None else False
 
         for typ, state_key in types:
             key = (typ, state_key)
 
-            if typ is None:
-                include_other_types = True
-                next
-
             if state_key is None:
                 type_to_key[typ] = None
                 # XXX: why do we mark the type as missing from our cache just
                 # because we weren't filtering on a specific value of state_key?
+                # is it because the cache doesn't handle wildcards?
                 missing_types.add(key)
             else:
                 if type_to_key.get(typ, object()) is not None:
@@ -556,7 +561,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         def include(typ, state_key):
             valid_state_keys = type_to_key.get(typ, sentinel)
             if valid_state_keys is sentinel:
-                return include_other_types
+                return include_other_types and typ not in filtered_types
             if valid_state_keys is None:
                 return True
             if state_key in valid_state_keys:
@@ -585,21 +590,23 @@ class StateGroupWorkerStore(SQLBaseStore):
         return state_dict_ids, is_all
 
     @defer.inlineCallbacks
-    def _get_state_for_groups(self, groups, types=None):
+    def _get_state_for_groups(self, groups, types=None, filtered_types=None):
         """Gets the state at each of a list of state groups, optionally
         filtering by type/state_key
 
         Args:
             groups (iterable[int]): list of state groups for which we want
                 to get the state.
-            types (None|iterable[(None|str, None|str)]):
+            types (None|iterable[(None, None|str)]):
                 indicates the state type/keys required. If None, the whole
                 state is fetched and returned.
 
                 Otherwise, each entry should be a `(type, state_key)` tuple to
                 include in the response. A `state_key` of None is a wildcard
-                meaning that we require all state with that type. A `type` of None
-                indicates that types not in the list should not be filtered out.
+                meaning that we require all state with that type.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             Deferred[dict[int, dict[(type, state_key), EventBase]]]
@@ -612,7 +619,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         if types is not None:
             for group in set(groups):
                 state_dict_ids, _, got_all = self._get_some_state_from_cache(
-                    group, types,
+                    group, types, filtered_types
                 )
                 results[group] = state_dict_ids
 
@@ -645,7 +652,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                 types_to_fetch = types
 
             group_to_state_dict = yield self._get_state_groups_from_groups(
-                missing_groups, types_to_fetch,
+                missing_groups, types_to_fetch, filtered_types
             )
 
             for group, group_state_dict in iteritems(group_to_state_dict):
-- 
cgit 1.4.1


From bcaec2915ac74937171e27d507b8f9c0e39d3677 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Thu, 19 Jul 2018 19:03:50 +0100
Subject: incorporate review

---
 synapse/handlers/sync.py | 44 +++++++++++++++++++++++++++-----------------
 synapse/storage/state.py |  7 ++++---
 2 files changed, 31 insertions(+), 20 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index cb711b8758..b597f94cf6 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -435,7 +435,7 @@ class SyncHandler(object):
             A Deferred map from ((type, state_key)->Event)
         """
         state_ids = yield self.store.get_state_ids_for_event(
-            event.event_id, types, filtered_types=filtered_types
+            event.event_id, types, filtered_types=filtered_types,
         )
         if event.is_state():
             state_ids = state_ids.copy()
@@ -470,7 +470,7 @@ class SyncHandler(object):
         if last_events:
             last_event = last_events[-1]
             state = yield self.get_state_after_event(
-                last_event, types, filtered_types=filtered_types
+                last_event, types, filtered_types=filtered_types,
             )
 
         else:
@@ -505,7 +505,6 @@ class SyncHandler(object):
         with Measure(self.clock, "compute_state_delta"):
 
             types = None
-            member_state_ids = {}
             lazy_load_members = sync_config.filter_collection.lazy_load_members()
             filtered_types = None
 
@@ -521,10 +520,6 @@ class SyncHandler(object):
                     )
                 ]
 
-                # We can't remove redundant member types at this stage as it has
-                # to be done based on event_id, and we don't have the member
-                # event ids until we've pulled them out of the DB.
-
                 # only apply the filtering to room members
                 filtered_types = [EventTypes.Member]
 
@@ -532,27 +527,32 @@ class SyncHandler(object):
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
                         batch.events[-1].event_id, types=types,
-                        filtered_types=filtered_types
+                        filtered_types=filtered_types,
                     )
 
                     state_ids = yield self.store.get_state_ids_for_event(
                         batch.events[0].event_id, types=types,
-                        filtered_types=filtered_types
+                        filtered_types=filtered_types,
                     )
 
                 else:
                     current_state_ids = yield self.get_state_at(
                         room_id, stream_position=now_token, types=types,
-                        filtered_types=filtered_types
+                        filtered_types=filtered_types,
                     )
 
                     state_ids = current_state_ids
 
+                # track the membership state events as of the beginning of this
+                # timeline sequence, so they can be filtered out of the state
+                # if we are lazy loading members.
                 if lazy_load_members:
                     member_state_ids = {
                         t: state_ids[t]
                         for t in state_ids if t[0] == EventTypes.Member
                     }
+                else:
+                    member_state_ids = {}
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -569,28 +569,38 @@ class SyncHandler(object):
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
                     room_id, stream_position=since_token, types=types,
-                    filtered_types=filtered_types
+                    filtered_types=filtered_types,
                 )
 
                 current_state_ids = yield self.store.get_state_ids_for_event(
                     batch.events[-1].event_id, types=types,
-                    filtered_types=filtered_types
+                    filtered_types=filtered_types,
                 )
 
                 state_at_timeline_start = yield self.store.get_state_ids_for_event(
                     batch.events[0].event_id, types=types,
-                    filtered_types=filtered_types
+                    filtered_types=filtered_types,
                 )
 
+                # track the membership state events as of the beginning of this
+                # timeline sequence, so they can be filtered out of the state
+                # if we are lazy loading members.
                 if lazy_load_members:
-                    # TODO: filter out redundant members based on their event_ids
-                    # (not mxids) at this point. In practice, limited syncs are
+                    # TODO: optionally filter out redundant membership events at this
+                    # point, to stop repeatedly sending members in every /sync as if
+                    # the client isn't tracking them.
+                    # When implement, this should filter using event_ids (not mxids).
+                    # In practice, limited syncs are
                     # relatively rare so it's not a total disaster to send redundant
-                    # members down at this point.
+                    # members down at this point. Redundant members are ones which
+                    # repeatedly get sent down /sync because we don't know if the client
+                    # is caching them or not.
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
+                else:
+                    member_state_ids = {}
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -614,7 +624,7 @@ class SyncHandler(object):
                     if types:
                         state_ids = yield self.store.get_state_ids_for_event(
                             batch.events[0].event_id, types=types,
-                            filtered_types=filtered_types
+                            filtered_types=filtered_types,
                         )
 
         state = {}
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index ee531a2ce0..75c6366e7a 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -545,9 +545,10 @@ class StateGroupWorkerStore(SQLBaseStore):
 
             if state_key is None:
                 type_to_key[typ] = None
-                # XXX: why do we mark the type as missing from our cache just
-                # because we weren't filtering on a specific value of state_key?
-                # is it because the cache doesn't handle wildcards?
+                # we mark the type as missing from the cache because
+                # when the cache was populated it might have been done with a
+                # restricted set of state_keys, so the wildcard will not work
+                # and the cache may be incomplete.
                 missing_types.add(key)
             else:
                 if type_to_key.get(typ, object()) is not None:
-- 
cgit 1.4.1


From 254fb430d1662c93c56c2abbd6984e07fb04c36b Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 23 Jul 2018 19:21:20 +0100
Subject: incorporate review

---
 synapse/handlers/sync.py | 67 +++++++++++++++++++-----------------------------
 synapse/storage/state.py | 20 ++++++---------
 2 files changed, 35 insertions(+), 52 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index b597f94cf6..5689ad2f58 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -543,17 +543,6 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
-                # track the membership state events as of the beginning of this
-                # timeline sequence, so they can be filtered out of the state
-                # if we are lazy loading members.
-                if lazy_load_members:
-                    member_state_ids = {
-                        t: state_ids[t]
-                        for t in state_ids if t[0] == EventTypes.Member
-                    }
-                else:
-                    member_state_ids = {}
-
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -562,9 +551,9 @@ class SyncHandler(object):
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_ids,
-                    timeline_start_members=member_state_ids,
                     previous={},
                     current=current_state_ids,
+                    lazy_load_members=lazy_load_members,
                 )
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
@@ -582,37 +571,27 @@ class SyncHandler(object):
                     filtered_types=filtered_types,
                 )
 
-                # track the membership state events as of the beginning of this
-                # timeline sequence, so they can be filtered out of the state
-                # if we are lazy loading members.
-                if lazy_load_members:
-                    # TODO: optionally filter out redundant membership events at this
-                    # point, to stop repeatedly sending members in every /sync as if
-                    # the client isn't tracking them.
-                    # When implement, this should filter using event_ids (not mxids).
-                    # In practice, limited syncs are
-                    # relatively rare so it's not a total disaster to send redundant
-                    # members down at this point. Redundant members are ones which
-                    # repeatedly get sent down /sync because we don't know if the client
-                    # is caching them or not.
-                    member_state_ids = {
-                        t: state_at_timeline_start[t]
-                        for t in state_at_timeline_start if t[0] == EventTypes.Member
-                    }
-                else:
-                    member_state_ids = {}
-
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
                 }
 
+                # TODO: optionally filter out redundant membership events at this
+                # point, to stop repeatedly sending members in every /sync as if
+                # the client isn't tracking them.
+                # When implemented, this should filter using event_ids (not mxids).
+                # In practice, limited syncs are
+                # relatively rare so it's not a total disaster to send redundant
+                # members down at this point. Redundant members are ones which
+                # repeatedly get sent down /sync because we don't know if the client
+                # is caching them or not.
+
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_at_timeline_start,
-                    timeline_start_members=member_state_ids,
                     previous=state_at_previous_sync,
                     current=current_state_ids,
+                    lazy_load_members=lazy_load_members,
                 )
             else:
                 state_ids = {}
@@ -1536,16 +1515,14 @@ def _action_has_highlight(actions):
     return False
 
 
-def _calculate_state(timeline_contains, timeline_start, timeline_start_members,
-                     previous, current):
+def _calculate_state(
+    timeline_contains, timeline_start, previous, current, lazy_load_members,
+):
     """Works out what state to include in a sync response.
 
     Args:
         timeline_contains (dict): state in the timeline
         timeline_start (dict): state at the start of the timeline
-        timeline_start_members (dict): state at the start of the timeline
-            for room members who participate in this chunk of timeline.
-            Should always be a subset of timeline_start.
         previous (dict): state at the end of the previous sync (or empty dict
             if this is an initial sync)
         current (dict): state at the end of the timeline
@@ -1565,11 +1542,21 @@ def _calculate_state(timeline_contains, timeline_start, timeline_start_members,
 
     c_ids = set(e for e in current.values())
     ts_ids = set(e for e in timeline_start.values())
-    tsm_ids = set(e for e in timeline_start_members.values())
     p_ids = set(e for e in previous.values())
     tc_ids = set(e for e in timeline_contains.values())
 
-    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids
+    # track the membership events in the state as of the start of the timeline
+    # so we can add them back in to the state if we're lazyloading.  We don't
+    # add them into state if they're already contained in the timeline.
+    if lazy_load_members:
+        ll_ids = set(
+            e for t, e in timeline_start.iteritems()
+            if t[0] == EventTypes.Member and e not in tc_ids
+        )
+    else:
+        ll_ids = set()
+
+    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids
 
     return {
         event_id_to_key[e]: e for e in state_ids
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index f09be7172d..40ca8bd2a2 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -191,10 +191,10 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             groups(list[int]): list of state group IDs to query
-            types(list[str|None, str|None])|None: List of 2-tuples of the form
+            types (Iterable[str, str|None]|None): list of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
                 state_keys for the `type`. If None, all types are returned.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
+            filtered_types(Iterable[str]|None): Only apply filtering via `types` to this
                 list of event types.  Other types of events are returned unfiltered.
                 If None, `types` filtering is applied to all events.
 
@@ -207,19 +207,17 @@ class StateGroupWorkerStore(SQLBaseStore):
         for chunk in chunks:
             res = yield self.runInteraction(
                 "_get_state_groups_from_groups",
-                self._get_state_groups_from_groups_txn, chunk, types, filtered_types
+                self._get_state_groups_from_groups_txn, chunk, types, filtered_types,
             )
             results.update(res)
 
         defer.returnValue(results)
 
     def _get_state_groups_from_groups_txn(
-        self, txn, groups, types=None, filtered_types=None
+        self, txn, groups, types=None, filtered_types=None,
     ):
         results = {group: {} for group in groups}
 
-        include_other_types = False if filtered_types is None else True
-
         if types is not None:
             types = list(set(types))  # deduplicate types list
 
@@ -269,7 +267,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                     for etype, state_key in types
                 ]
 
-                if include_other_types:
+                if filtered_types is not None:
                     # XXX: check whether this slows postgres down like a list of
                     # ORs does too?
                     unique_types = set(filtered_types)
@@ -308,7 +306,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_clauses.append("(type = ? AND state_key = ?)")
                         where_args.extend([typ[0], typ[1]])
 
-                if include_other_types:
+                if filtered_types is not None:
                     unique_types = set(filtered_types)
                     where_clauses.append(
                         "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
@@ -538,8 +536,6 @@ class StateGroupWorkerStore(SQLBaseStore):
         # tracks which of the requested types are missing from our cache
         missing_types = set()
 
-        include_other_types = False if filtered_types is None else True
-
         for typ, state_key in types:
             key = (typ, state_key)
 
@@ -562,7 +558,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         def include(typ, state_key):
             valid_state_keys = type_to_key.get(typ, sentinel)
             if valid_state_keys is sentinel:
-                return include_other_types and typ not in filtered_types
+                return filtered_types is not None and typ not in filtered_types
             if valid_state_keys is None:
                 return True
             if state_key in valid_state_keys:
@@ -598,7 +594,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         Args:
             groups (iterable[int]): list of state groups for which we want
                 to get the state.
-            types (None|iterable[(None, None|str)]):
+            types (None|iterable[(str, None|str)]):
                 indicates the state type/keys required. If None, the whole
                 state is fetched and returned.
 
-- 
cgit 1.4.1


From cd241d6bda01a761fbe1ca29727dacd918fb8975 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 24 Jul 2018 12:39:40 +0100
Subject: incorporate more review

---
 synapse/handlers/sync.py    | 12 +++++++++---
 synapse/storage/state.py    | 36 +++++++++---------------------------
 tests/storage/test_state.py |  9 +++++++++
 3 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 5689ad2f58..e5a2329d73 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1526,6 +1526,9 @@ def _calculate_state(
         previous (dict): state at the end of the previous sync (or empty dict
             if this is an initial sync)
         current (dict): state at the end of the timeline
+        lazy_load_members (bool): whether to return members from timeline_start
+            or not.  assumes that timeline_start has already been filtered to
+            include only the members the client needs to know about.
 
     Returns:
         dict
@@ -1545,9 +1548,12 @@ def _calculate_state(
     p_ids = set(e for e in previous.values())
     tc_ids = set(e for e in timeline_contains.values())
 
-    # track the membership events in the state as of the start of the timeline
-    # so we can add them back in to the state if we're lazyloading.  We don't
-    # add them into state if they're already contained in the timeline.
+    # If we are lazyloading room members, we explicitly add the membership events
+    # for the senders in the timeline into the state block returned by /sync,
+    # as we may not have sent them to the client before.  We find these membership
+    # events by filtering them out of timeline_start, which has already been filtered
+    # to only include membership events for the senders in the timeline.
+
     if lazy_load_members:
         ll_ids = set(
             e for t, e in timeline_start.iteritems()
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index f99d3871e4..1413a6f910 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         })
 
     @defer.inlineCallbacks
-    def _get_state_groups_from_groups(self, groups, types, filtered_types=None):
+    def _get_state_groups_from_groups(self, groups, types):
         """Returns the state groups for a given set of groups, filtering on
         types of state events.
 
@@ -194,9 +194,6 @@ class StateGroupWorkerStore(SQLBaseStore):
             types (Iterable[str, str|None]|None): list of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
                 state_keys for the `type`. If None, all types are returned.
-            filtered_types(Iterable[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
 
         Returns:
             dictionary state_group -> (dict of (type, state_key) -> event id)
@@ -207,14 +204,14 @@ class StateGroupWorkerStore(SQLBaseStore):
         for chunk in chunks:
             res = yield self.runInteraction(
                 "_get_state_groups_from_groups",
-                self._get_state_groups_from_groups_txn, chunk, types, filtered_types,
+                self._get_state_groups_from_groups_txn, chunk, types,
             )
             results.update(res)
 
         defer.returnValue(results)
 
     def _get_state_groups_from_groups_txn(
-        self, txn, groups, types=None, filtered_types=None,
+        self, txn, groups, types=None,
     ):
         results = {group: {} for group in groups}
 
@@ -266,17 +263,6 @@ class StateGroupWorkerStore(SQLBaseStore):
                     )
                     for etype, state_key in types
                 ]
-
-                if filtered_types is not None:
-                    # XXX: check whether this slows postgres down like a list of
-                    # ORs does too?
-                    unique_types = set(filtered_types)
-                    clause_to_args.append(
-                        (
-                            "AND type <> ? " * len(unique_types),
-                            list(unique_types)
-                        )
-                    )
             else:
                 # If types is None we fetch all the state, and so just use an
                 # empty where clause with no extra args.
@@ -306,13 +292,6 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_clauses.append("(type = ? AND state_key = ?)")
                         where_args.extend([typ[0], typ[1]])
 
-                if filtered_types is not None:
-                    unique_types = set(filtered_types)
-                    where_clauses.append(
-                        "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
-                    )
-                    where_args.extend(list(unique_types))
-
                 where_clause = "AND (%s)" % (" OR ".join(where_clauses))
             else:
                 where_clause = ""
@@ -643,13 +622,13 @@ class StateGroupWorkerStore(SQLBaseStore):
             # cache. Hence, if we are doing a wildcard lookup, populate the
             # cache fully so that we can do an efficient lookup next time.
 
-            if types and any(k is None for (t, k) in types):
+            if filtered_types or (types and any(k is None for (t, k) in types)):
                 types_to_fetch = None
             else:
                 types_to_fetch = types
 
             group_to_state_dict = yield self._get_state_groups_from_groups(
-                missing_groups, types_to_fetch, filtered_types
+                missing_groups, types_to_fetch
             )
 
             for group, group_state_dict in iteritems(group_to_state_dict):
@@ -659,7 +638,10 @@ class StateGroupWorkerStore(SQLBaseStore):
                 if types:
                     for k, v in iteritems(group_state_dict):
                         (typ, _) = k
-                        if k in types or (typ, None) in types:
+                        if (
+                            (k in types or (typ, None) in types) or
+                            (filtered_types and typ not in filtered_types)
+                        ):
                             state_dict[k] = v
                 else:
                     state_dict.update(group_state_dict)
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index 8924ba9f7f..b2f314e9db 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -158,3 +158,12 @@ class StateStoreTestCase(tests.unittest.TestCase):
             (e2.type, e2.state_key): e2,
             (e3.type, e3.state_key): e3,
         }, state)
+
+        state = yield self.store.get_state_for_event(
+            e5.event_id, [], filtered_types=[EventTypes.Member],
+        )
+
+        self.assertStateMapEqual({
+            (e1.type, e1.state_key): e1,
+            (e2.type, e2.state_key): e2,
+        }, state)
-- 
cgit 1.4.1


From eb1d911ab743e85154f7c4b2db8a954d152020dc Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 24 Jul 2018 13:40:49 +0100
Subject: rather than adding ll_ids, remove them from p_ids

---
 synapse/handlers/sync.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index e5a2329d73..1422843af8 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1553,16 +1553,17 @@ def _calculate_state(
     # as we may not have sent them to the client before.  We find these membership
     # events by filtering them out of timeline_start, which has already been filtered
     # to only include membership events for the senders in the timeline.
+    # In practice, we can do this by removing them from the p_ids list.
+    # see https://github.com/matrix-org/synapse/pull/2970
+    #            /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809
 
     if lazy_load_members:
-        ll_ids = set(
+        p_ids.difference_update(
             e for t, e in timeline_start.iteritems()
-            if t[0] == EventTypes.Member and e not in tc_ids
+            if t[0] == EventTypes.Member
         )
-    else:
-        ll_ids = set()
 
-    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids
+    state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
 
     return {
         event_id_to_key[e]: e for e in state_ids
-- 
cgit 1.4.1


From 1a01a5b964d3ea373355684a91b9f7fd95726fbc Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 24 Jul 2018 14:03:15 +0100
Subject: clarify comment on p_ids

---
 synapse/handlers/sync.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 1422843af8..4ced3144c8 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1553,7 +1553,8 @@ def _calculate_state(
     # as we may not have sent them to the client before.  We find these membership
     # events by filtering them out of timeline_start, which has already been filtered
     # to only include membership events for the senders in the timeline.
-    # In practice, we can do this by removing them from the p_ids list.
+    # In practice, we can do this by removing them from the p_ids list,
+    # which is the list of relevant state we know we have already sent to the client.
     # see https://github.com/matrix-org/synapse/pull/2970
     #            /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809
 
-- 
cgit 1.4.1


From d8e65ed7e111243c08c0b87c9a49e7537c355074 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Wed, 25 Jul 2018 15:44:41 -0600
Subject: Fix a minor documentation typo in on_make_leave

---
 synapse/handlers/federation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'synapse/handlers')

diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 145c1a21d4..49068c06d9 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1279,7 +1279,7 @@ class FederationHandler(BaseHandler):
     @log_function
     def on_make_leave_request(self, room_id, user_id):
         """ We've received a /make_leave/ request, so we create a partial
-        join event for the room and return that. We do *not* persist or
+        leave event for the room and return that. We do *not* persist or
         process it until the other server has signed it and sent it back.
         """
         builder = self.event_builder_factory.new({
-- 
cgit 1.4.1


From 03751a64203b169cbf33b636b6d940ca6d414c31 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 26 Jul 2018 11:44:26 +0100
Subject: Fix some looping_call calls which were broken in #3604

It turns out that looping_call does check the deferred returned by its
callback, and (at least in the case of client_ips), we were relying on this,
and I broke it in #3604.

Update run_as_background_process to return the deferred, and make sure we
return it to clock.looping_call.
---
 changelog.d/3610.feature                      |  1 +
 synapse/app/homeserver.py                     |  4 ++--
 synapse/groups/attestations.py                |  2 +-
 synapse/handlers/profile.py                   |  2 +-
 synapse/metrics/background_process_metrics.py | 10 ++++++++--
 synapse/rest/media/v1/media_repository.py     |  2 +-
 synapse/rest/media/v1/preview_url_resource.py |  2 +-
 synapse/storage/client_ips.py                 |  2 +-
 synapse/storage/devices.py                    |  2 +-
 synapse/storage/event_federation.py           |  2 +-
 synapse/storage/event_push_actions.py         |  4 ++--
 synapse/storage/transactions.py               |  4 +++-
 synapse/util/caches/expiringcache.py          |  2 +-
 13 files changed, 24 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/3610.feature

(limited to 'synapse/handlers')

diff --git a/changelog.d/3610.feature b/changelog.d/3610.feature
new file mode 100644
index 0000000000..77a294cb9f
--- /dev/null
+++ b/changelog.d/3610.feature
@@ -0,0 +1 @@
+Add metrics to track resource usage by background processes
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index b7e7718290..57b815d777 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -429,7 +429,7 @@ def run(hs):
     stats_process = []
 
     def start_phone_stats_home():
-        run_as_background_process("phone_stats_home", phone_stats_home)
+        return run_as_background_process("phone_stats_home", phone_stats_home)
 
     @defer.inlineCallbacks
     def phone_stats_home():
@@ -502,7 +502,7 @@ def run(hs):
             )
 
     def generate_user_daily_visit_stats():
-        run_as_background_process(
+        return run_as_background_process(
             "generate_user_daily_visits",
             hs.get_datastore().generate_user_daily_visits,
         )
diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py
index 4216af0a27..b04f4234ca 100644
--- a/synapse/groups/attestations.py
+++ b/synapse/groups/attestations.py
@@ -153,7 +153,7 @@ class GroupAttestionRenewer(object):
         defer.returnValue({})
 
     def _start_renew_attestations(self):
-        run_as_background_process("renew_attestations", self._renew_attestations)
+        return run_as_background_process("renew_attestations", self._renew_attestations)
 
     @defer.inlineCallbacks
     def _renew_attestations(self):
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 43692b83a8..cb5c6d587e 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -256,7 +256,7 @@ class ProfileHandler(BaseHandler):
                 )
 
     def _start_update_remote_profile_cache(self):
-        run_as_background_process(
+        return run_as_background_process(
             "Update remote profile", self._update_remote_profile_cache,
         )
 
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index 9d820e44a6..ce678d5f75 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -151,13 +151,19 @@ def run_as_background_process(desc, func, *args, **kwargs):
     This should be used to wrap processes which are fired off to run in the
     background, instead of being associated with a particular request.
 
+    It returns a Deferred which completes when the function completes, but it doesn't
+    follow the synapse logcontext rules, which makes it appropriate for passing to
+    clock.looping_call and friends (or for firing-and-forgetting in the middle of a
+    normal synapse inlineCallbacks function).
+
     Args:
         desc (str): a description for this background process type
         func: a function, which may return a Deferred
         args: positional args for func
         kwargs: keyword args for func
 
-    Returns: None
+    Returns: Deferred which returns the result of func, but note that it does not
+        follow the synapse logcontext rules.
     """
     @defer.inlineCallbacks
     def run():
@@ -176,4 +182,4 @@ def run_as_background_process(desc, func, *args, **kwargs):
                 _background_processes[desc].remove(proc)
 
     with PreserveLoggingContext():
-        run()
+        return run()
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 5b13378caa..174ad20123 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -106,7 +106,7 @@ class MediaRepository(object):
         )
 
     def _start_update_recently_accessed(self):
-        run_as_background_process(
+        return run_as_background_process(
             "update_recently_accessed_media", self._update_recently_accessed,
         )
 
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 4efd5339a4..27aa0def2f 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -373,7 +373,7 @@ class PreviewUrlResource(Resource):
         })
 
     def _start_expire_url_cache_data(self):
-        run_as_background_process(
+        return run_as_background_process(
             "expire_url_cache_data", self._expire_url_cache_data,
         )
 
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 77ae10da3d..b8cefd43d6 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -102,7 +102,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
                 to_update,
             )
 
-        run_as_background_process(
+        return run_as_background_process(
             "update_client_ips", update,
         )
 
diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py
index 52dccb1507..c0943ecf91 100644
--- a/synapse/storage/devices.py
+++ b/synapse/storage/devices.py
@@ -712,7 +712,7 @@ class DeviceStore(SQLBaseStore):
 
             logger.info("Pruned %d device list outbound pokes", txn.rowcount)
 
-        run_as_background_process(
+        return run_as_background_process(
             "prune_old_outbound_device_pokes",
             self.runInteraction,
             "_prune_old_outbound_device_pokes",
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index 65f2d19e20..f269ec6fb3 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -549,7 +549,7 @@ class EventFederationStore(EventFederationWorkerStore):
                 sql,
                 (self.stream_ordering_month_ago, self.stream_ordering_month_ago,)
             )
-        run_as_background_process(
+        return run_as_background_process(
             "delete_old_forward_extrem_cache",
             self.runInteraction,
             "_delete_old_forward_extrem_cache",
diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py
index 4f44b0ad47..6840320641 100644
--- a/synapse/storage/event_push_actions.py
+++ b/synapse/storage/event_push_actions.py
@@ -460,7 +460,7 @@ class EventPushActionsWorkerStore(SQLBaseStore):
             )
 
     def _find_stream_orderings_for_times(self):
-        run_as_background_process(
+        return run_as_background_process(
             "event_push_action_stream_orderings",
             self.runInteraction,
             "_find_stream_orderings_for_times",
@@ -790,7 +790,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
         """, (room_id, user_id, stream_ordering))
 
     def _start_rotate_notifs(self):
-        run_as_background_process("rotate_notifs", self._rotate_notifs)
+        return run_as_background_process("rotate_notifs", self._rotate_notifs)
 
     @defer.inlineCallbacks
     def _rotate_notifs(self):
diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py
index b4b479d94c..428e7fa36e 100644
--- a/synapse/storage/transactions.py
+++ b/synapse/storage/transactions.py
@@ -273,7 +273,9 @@ class TransactionStore(SQLBaseStore):
         return self.cursor_to_dict(txn)
 
     def _start_cleanup_transactions(self):
-        run_as_background_process("cleanup_transactions", self._cleanup_transactions)
+        return run_as_background_process(
+            "cleanup_transactions", self._cleanup_transactions,
+        )
 
     def _cleanup_transactions(self):
         now = self._clock.time_msec()
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index 465adc54a8..ce85b2ae11 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -64,7 +64,7 @@ class ExpiringCache(object):
             return
 
         def f():
-            run_as_background_process(
+            return run_as_background_process(
                 "prune_cache_%s" % self._cache_name,
                 self._prune_cache,
             )
-- 
cgit 1.4.1


From a75231b507e025eaaa4f06d8932c04fa4e942d48 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@arasphere.net>
Date: Thu, 26 Jul 2018 22:51:30 +0100
Subject: Deduplicate redundant lazy-loaded members (#3331)

* attempt at deduplicating lazy-loaded members

as per the proposal; we can deduplicate redundant lazy-loaded members
which are sent in the same sync sequence. we do this heuristically
rather than requiring the client to somehow tell us which members it
has chosen to cache, by instead caching the last N members sent to
a client, and not sending them again.  For now we hardcode N to 100.
Each cache for a given (user,device) tuple is in turn cached for up to
X minutes (to avoid the caches building up).  For now we hardcode X to 30.

* add include_redundant_members filter option & make it work

* remove stale todo

* add tests for _get_some_state_from_cache

* incorporate review
---
 changelog.d/3331.feature |  1 +
 synapse/api/filtering.py |  9 +++++
 synapse/handlers/sync.py | 87 ++++++++++++++++++++++++++++++++++--------------
 3 files changed, 72 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/3331.feature

(limited to 'synapse/handlers')

diff --git a/changelog.d/3331.feature b/changelog.d/3331.feature
new file mode 100644
index 0000000000..e574b9bcc3
--- /dev/null
+++ b/changelog.d/3331.feature
@@ -0,0 +1 @@
+add support for the include_redundant_members filter param as per MSC1227
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 7e767b9bf5..186831e118 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -117,6 +117,9 @@ ROOM_EVENT_FILTER_SCHEMA = {
         "lazy_load_members": {
             "type": "boolean"
         },
+        "include_redundant_members": {
+            "type": "boolean"
+        },
     }
 }
 
@@ -267,6 +270,9 @@ class FilterCollection(object):
     def lazy_load_members(self):
         return self._room_state_filter.lazy_load_members()
 
+    def include_redundant_members(self):
+        return self._room_state_filter.include_redundant_members()
+
     def filter_presence(self, events):
         return self._presence_filter.filter(events)
 
@@ -426,6 +432,9 @@ class Filter(object):
     def lazy_load_members(self):
         return self.filter_json.get("lazy_load_members", False)
 
+    def include_redundant_members(self):
+        return self.filter_json.get("include_redundant_members", False)
+
 
 def _matches_wildcard(actual_value, filter_value):
     if filter_value.endswith("*"):
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4ced3144c8..dff1f67dcb 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -26,6 +26,8 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.push.clientformat import format_push_rules_for_user
 from synapse.types import RoomStreamToken
 from synapse.util.async import concurrently_execute
+from synapse.util.caches.expiringcache import ExpiringCache
+from synapse.util.caches.lrucache import LruCache
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.logcontext import LoggingContext
 from synapse.util.metrics import Measure, measure_func
@@ -33,6 +35,14 @@ from synapse.visibility import filter_events_for_client
 
 logger = logging.getLogger(__name__)
 
+# Store the cache that tracks which lazy-loaded members have been sent to a given
+# client for no more than 30 minutes.
+LAZY_LOADED_MEMBERS_CACHE_MAX_AGE = 30 * 60 * 1000
+
+# Remember the last 100 members we sent to a client for the purposes of
+# avoiding redundantly sending the same lazy-loaded members to the client
+LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE = 100
+
 
 SyncConfig = collections.namedtuple("SyncConfig", [
     "user",
@@ -182,6 +192,12 @@ class SyncHandler(object):
         self.response_cache = ResponseCache(hs, "sync")
         self.state = hs.get_state_handler()
 
+        # ExpiringCache((User, Device)) -> LruCache(state_key => event_id)
+        self.lazy_loaded_members_cache = ExpiringCache(
+            "lazy_loaded_members_cache", self.clock,
+            max_len=0, expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE,
+        )
+
     def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0,
                                full_state=False):
         """Get the sync for a client if we have new data for it now. Otherwise
@@ -505,9 +521,13 @@ class SyncHandler(object):
         with Measure(self.clock, "compute_state_delta"):
 
             types = None
-            lazy_load_members = sync_config.filter_collection.lazy_load_members()
             filtered_types = None
 
+            lazy_load_members = sync_config.filter_collection.lazy_load_members()
+            include_redundant_members = (
+                sync_config.filter_collection.include_redundant_members()
+            )
+
             if lazy_load_members:
                 # We only request state for the members needed to display the
                 # timeline:
@@ -523,6 +543,11 @@ class SyncHandler(object):
                 # only apply the filtering to room members
                 filtered_types = [EventTypes.Member]
 
+            timeline_state = {
+                (event.type, event.state_key): event.event_id
+                for event in batch.events if event.is_state()
+            }
+
             if full_state:
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
@@ -543,11 +568,6 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
-                timeline_state = {
-                    (event.type, event.state_key): event.event_id
-                    for event in batch.events if event.is_state()
-                }
-
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_ids,
@@ -571,21 +591,6 @@ class SyncHandler(object):
                     filtered_types=filtered_types,
                 )
 
-                timeline_state = {
-                    (event.type, event.state_key): event.event_id
-                    for event in batch.events if event.is_state()
-                }
-
-                # TODO: optionally filter out redundant membership events at this
-                # point, to stop repeatedly sending members in every /sync as if
-                # the client isn't tracking them.
-                # When implemented, this should filter using event_ids (not mxids).
-                # In practice, limited syncs are
-                # relatively rare so it's not a total disaster to send redundant
-                # members down at this point. Redundant members are ones which
-                # repeatedly get sent down /sync because we don't know if the client
-                # is caching them or not.
-
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_at_timeline_start,
@@ -596,16 +601,48 @@ class SyncHandler(object):
             else:
                 state_ids = {}
                 if lazy_load_members:
-                    # TODO: filter out redundant members based on their mxids (not their
-                    # event_ids) at this point. We know we can do it based on mxid as this
-                    # is an non-gappy incremental sync.
-
                     if types:
                         state_ids = yield self.store.get_state_ids_for_event(
                             batch.events[0].event_id, types=types,
                             filtered_types=filtered_types,
                         )
 
+            if lazy_load_members and not include_redundant_members:
+                cache_key = (sync_config.user.to_string(), sync_config.device_id)
+                cache = self.lazy_loaded_members_cache.get(cache_key)
+                if cache is None:
+                    logger.debug("creating LruCache for %r", cache_key)
+                    cache = LruCache(LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE)
+                    self.lazy_loaded_members_cache[cache_key] = cache
+                else:
+                    logger.debug("found LruCache for %r", cache_key)
+
+                # if it's a new sync sequence, then assume the client has had
+                # amnesia and doesn't want any recent lazy-loaded members
+                # de-duplicated.
+                if since_token is None:
+                    logger.debug("clearing LruCache for %r", cache_key)
+                    cache.clear()
+                else:
+                    # only send members which aren't in our LruCache (either
+                    # because they're new to this client or have been pushed out
+                    # of the cache)
+                    logger.debug("filtering state from %r...", state_ids)
+                    state_ids = {
+                        t: event_id
+                        for t, event_id in state_ids.iteritems()
+                        if cache.get(t[1]) != event_id
+                    }
+                    logger.debug("...to %r", state_ids)
+
+                # add any member IDs we are about to send into our LruCache
+                for t, event_id in itertools.chain(
+                    state_ids.items(),
+                    timeline_state.items(),
+                ):
+                    if t[0] == EventTypes.Member:
+                        cache.set(t[1], event_id)
+
         state = {}
         if state_ids:
             state = yield self.store.get_events(list(state_ids.values()))
-- 
cgit 1.4.1


From e9b2d047f68b74e231609ce40978f4452ac9e22f Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@arasphere.net>
Date: Fri, 27 Jul 2018 15:12:50 +0100
Subject: make /context lazyload & filter aware (#3567)

make /context lazyload & filter aware.
---
 changelog.d/3567.feature       |  1 +
 synapse/handlers/room.py       | 24 +++++++++++++++++++++---
 synapse/handlers/search.py     |  2 +-
 synapse/rest/client/v1/room.py |  9 +++++++++
 synapse/storage/stream.py      | 14 +++++++++++---
 5 files changed, 43 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/3567.feature

(limited to 'synapse/handlers')

diff --git a/changelog.d/3567.feature b/changelog.d/3567.feature
new file mode 100644
index 0000000000..c74c1f57a9
--- /dev/null
+++ b/changelog.d/3567.feature
@@ -0,0 +1 @@
+make the /context API filter & lazy-load aware as per MSC1227
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 003b848c00..7b7804d9b2 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 """Contains functions for performing events on rooms."""
+import itertools
 import logging
 import math
 import string
@@ -401,7 +402,7 @@ class RoomContextHandler(object):
         self.store = hs.get_datastore()
 
     @defer.inlineCallbacks
-    def get_event_context(self, user, room_id, event_id, limit):
+    def get_event_context(self, user, room_id, event_id, limit, event_filter):
         """Retrieves events, pagination tokens and state around a given event
         in a room.
 
@@ -411,6 +412,8 @@ class RoomContextHandler(object):
             event_id (str)
             limit (int): The maximum number of events to return in total
                 (excluding state).
+            event_filter (Filter|None): the filter to apply to the events returned
+                (excluding the target event_id)
 
         Returns:
             dict, or None if the event isn't found
@@ -443,7 +446,7 @@ class RoomContextHandler(object):
             )
 
         results = yield self.store.get_events_around(
-            room_id, event_id, before_limit, after_limit
+            room_id, event_id, before_limit, after_limit, event_filter
         )
 
         results["events_before"] = yield filter_evts(results["events_before"])
@@ -455,8 +458,23 @@ class RoomContextHandler(object):
         else:
             last_event_id = event_id
 
+        types = None
+        filtered_types = None
+        if event_filter and event_filter.lazy_load_members():
+            members = set(ev.sender for ev in itertools.chain(
+                results["events_before"],
+                (results["event"],),
+                results["events_after"],
+            ))
+            filtered_types = [EventTypes.Member]
+            types = [(EventTypes.Member, member) for member in members]
+
+        # XXX: why do we return the state as of the last event rather than the
+        # first? Shouldn't we be consistent with /sync?
+        # https://github.com/matrix-org/matrix-doc/issues/687
+
         state = yield self.store.get_state_for_events(
-            [last_event_id], None
+            [last_event_id], types, filtered_types=filtered_types,
         )
         results["state"] = list(state[last_event_id].values())
 
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 69ae9731d5..c464adbd0b 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -287,7 +287,7 @@ class SearchHandler(BaseHandler):
             contexts = {}
             for event in allowed_events:
                 res = yield self.store.get_events_around(
-                    event.room_id, event.event_id, before_limit, after_limit
+                    event.room_id, event.event_id, before_limit, after_limit,
                 )
 
                 logger.info(
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index b7bd878c90..13c331550b 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -531,11 +531,20 @@ class RoomEventContextServlet(ClientV1RestServlet):
 
         limit = parse_integer(request, "limit", default=10)
 
+        # picking the API shape for symmetry with /messages
+        filter_bytes = parse_string(request, "filter")
+        if filter_bytes:
+            filter_json = urlparse.unquote(filter_bytes).decode("UTF-8")
+            event_filter = Filter(json.loads(filter_json))
+        else:
+            event_filter = None
+
         results = yield self.room_context_handler.get_event_context(
             requester.user,
             room_id,
             event_id,
             limit,
+            event_filter,
         )
 
         if not results:
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index 66856342f0..25d0097b58 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -527,7 +527,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             )
 
     @defer.inlineCallbacks
-    def get_events_around(self, room_id, event_id, before_limit, after_limit):
+    def get_events_around(
+        self, room_id, event_id, before_limit, after_limit, event_filter=None,
+    ):
         """Retrieve events and pagination tokens around a given event in a
         room.
 
@@ -536,6 +538,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             event_id (str)
             before_limit (int)
             after_limit (int)
+            event_filter (Filter|None)
 
         Returns:
             dict
@@ -543,7 +546,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         results = yield self.runInteraction(
             "get_events_around", self._get_events_around_txn,
-            room_id, event_id, before_limit, after_limit
+            room_id, event_id, before_limit, after_limit, event_filter,
         )
 
         events_before = yield self._get_events(
@@ -563,7 +566,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             "end": results["after"]["token"],
         })
 
-    def _get_events_around_txn(self, txn, room_id, event_id, before_limit, after_limit):
+    def _get_events_around_txn(
+        self, txn, room_id, event_id, before_limit, after_limit, event_filter,
+    ):
         """Retrieves event_ids and pagination tokens around a given event in a
         room.
 
@@ -572,6 +577,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             event_id (str)
             before_limit (int)
             after_limit (int)
+            event_filter (Filter|None)
 
         Returns:
             dict
@@ -601,11 +607,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         rows, start_token = self._paginate_room_events_txn(
             txn, room_id, before_token, direction='b', limit=before_limit,
+            event_filter=event_filter,
         )
         events_before = [r.event_id for r in rows]
 
         rows, end_token = self._paginate_room_events_txn(
             txn, room_id, after_token, direction='f', limit=after_limit,
+            event_filter=event_filter,
         )
         events_after = [r.event_id for r in rows]
 
-- 
cgit 1.4.1