From 9b334b3f97057ac145622d2e4d0ad036ef27b468 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Sun, 11 Mar 2018 20:01:41 +0000
Subject: WIP experiment in lazyloading room members

---
 synapse/handlers/sync.py | 43 +++++++++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 12 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0f713ce038..809e9fece9 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -399,7 +399,7 @@ class SyncHandler(object):
         ))
 
     @defer.inlineCallbacks
-    def get_state_after_event(self, event):
+    def get_state_after_event(self, event, types=None):
         """
         Get the room state after the given event
 
@@ -409,14 +409,14 @@ class SyncHandler(object):
         Returns:
             A Deferred map from ((type, state_key)->Event)
         """
-        state_ids = yield self.store.get_state_ids_for_event(event.event_id)
+        state_ids = yield self.store.get_state_ids_for_event(event.event_id, types)
         if event.is_state():
             state_ids = state_ids.copy()
             state_ids[(event.type, event.state_key)] = event.event_id
         defer.returnValue(state_ids)
 
     @defer.inlineCallbacks
-    def get_state_at(self, room_id, stream_position):
+    def get_state_at(self, room_id, stream_position, types=None):
         """ Get the room state at a particular stream position
 
         Args:
@@ -432,7 +432,7 @@ class SyncHandler(object):
 
         if last_events:
             last_event = last_events[-1]
-            state = yield self.get_state_after_event(last_event)
+            state = yield self.get_state_after_event(last_event, types)
 
         else:
             # no events in this room - so presumably no state
@@ -441,7 +441,7 @@ class SyncHandler(object):
 
     @defer.inlineCallbacks
     def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token,
-                            full_state):
+                            full_state, filter_members):
         """ Works out the differnce in state between the start of the timeline
         and the previous sync.
 
@@ -454,6 +454,8 @@ class SyncHandler(object):
                 be None.
             now_token(str): Token of the end of the current batch.
             full_state(bool): Whether to force returning the full state.
+            filter_members(bool): Whether to only return state for members
+                referenced in this timeline segment
 
         Returns:
              A deferred new event dictionary
@@ -464,18 +466,35 @@ class SyncHandler(object):
         # TODO(mjark) Check for new redactions in the state events.
 
         with Measure(self.clock, "compute_state_delta"):
+
+            types = None
+            if filter_members:
+                # We only request state for the members needed to display the
+                # timeline:
+                types = (
+                    (EventTypes.Member, state_key)
+                    for state_key in set(
+                        event.sender  # FIXME: we also care about targets etc.
+                        for event in batch.events
+                    )
+                )
+                types.append((None, None))  # don't just filter to room members
+
+                # TODO: we should opportunistically deduplicate these members too
+                # within the same sync series (based on an in-memory cache)
+
             if full_state:
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[-1].event_id
+                        batch.events[-1].event_id, types=types
                     )
 
                     state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[0].event_id
+                        batch.events[0].event_id, types=types
                     )
                 else:
                     current_state_ids = yield self.get_state_at(
-                        room_id, stream_position=now_token
+                        room_id, stream_position=now_token, types=types
                     )
 
                     state_ids = current_state_ids
@@ -493,15 +512,15 @@ class SyncHandler(object):
                 )
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
-                    room_id, stream_position=since_token
+                    room_id, stream_position=since_token, types=types
                 )
 
                 current_state_ids = yield self.store.get_state_ids_for_event(
-                    batch.events[-1].event_id
+                    batch.events[-1].event_id, types=types
                 )
 
                 state_at_timeline_start = yield self.store.get_state_ids_for_event(
-                    batch.events[0].event_id
+                    batch.events[0].event_id, types=types
                 )
 
                 timeline_state = {
@@ -1325,7 +1344,7 @@ class SyncHandler(object):
 
         state = yield self.compute_state_delta(
             room_id, batch, sync_config, since_token, now_token,
-            full_state=full_state
+            full_state=full_state, filter_members=True
         )
 
         if room_builder.rtype == "joined":
-- 
cgit 1.4.1


From 87133652657c5073616419b0afc533eac6ae6750 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Sun, 11 Mar 2018 20:10:25 +0000
Subject: typos

---
 synapse/handlers/sync.py | 4 ++--
 synapse/storage/state.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 809e9fece9..fa730ca760 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -471,13 +471,13 @@ class SyncHandler(object):
             if filter_members:
                 # We only request state for the members needed to display the
                 # timeline:
-                types = (
+                types = [
                     (EventTypes.Member, state_key)
                     for state_key in set(
                         event.sender  # FIXME: we also care about targets etc.
                         for event in batch.events
                     )
-                )
+                ]
                 types.append((None, None))  # don't just filter to room members
 
                 # TODO: we should opportunistically deduplicate these members too
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index da6bb685fa..0238200286 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -301,6 +301,8 @@ class StateGroupWorkerStore(SQLBaseStore):
                     args = [next_group]
                     if types:
                         args.extend(i for typ in types for i in typ)
+                        if include_other_types:
+                            args.extend(typ for (typ, _) in types)
 
                     txn.execute(
                         "SELECT type, state_key, event_id FROM state_groups_state"
-- 
cgit 1.4.1


From 14a9d2f73d50225f190f42e270cbf9ef7447bd8c Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 13 Mar 2018 22:03:42 +0000
Subject: ensure we always include the members for a given timeline block

---
 synapse/handlers/sync.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index fa730ca760..c754cfdeeb 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -468,6 +468,8 @@ class SyncHandler(object):
         with Measure(self.clock, "compute_state_delta"):
 
             types = None
+            member_state_ids = {}
+
             if filter_members:
                 # We only request state for the members needed to display the
                 # timeline:
@@ -492,6 +494,13 @@ class SyncHandler(object):
                     state_ids = yield self.store.get_state_ids_for_event(
                         batch.events[0].event_id, types=types
                     )
+
+                    if filter_members:
+                        member_state_ids = {
+                            t: state_ids[t]
+                            for t in state_ids if t[0] == EventTypes.member
+                        }
+
                 else:
                     current_state_ids = yield self.get_state_at(
                         room_id, stream_position=now_token, types=types
@@ -499,6 +508,12 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
+                    if filter_members:
+                        member_state_ids = {
+                            t: state_ids[t]
+                            for t in state_ids if t[0] == EventTypes.member
+                        }
+
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -507,6 +522,7 @@ class SyncHandler(object):
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_ids,
+                    timeline_start_members=member_state_ids,
                     previous={},
                     current=current_state_ids,
                 )
@@ -523,6 +539,12 @@ class SyncHandler(object):
                     batch.events[0].event_id, types=types
                 )
 
+                if filter_members:
+                    member_state_ids = {
+                        t: state_at_timeline_start[t]
+                        for t in state_ids if t[0] == EventTypes.member
+                    }
+
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -531,6 +553,7 @@ class SyncHandler(object):
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_at_timeline_start,
+                    timeline_start_members=member_state_ids,
                     previous=state_at_previous_sync,
                     current=current_state_ids,
                 )
@@ -1440,12 +1463,16 @@ def _action_has_highlight(actions):
     return False
 
 
-def _calculate_state(timeline_contains, timeline_start, previous, current):
+def _calculate_state(timeline_contains, timeline_start, timeline_start_members,
+                     previous, current):
     """Works out what state to include in a sync response.
 
     Args:
         timeline_contains (dict): state in the timeline
         timeline_start (dict): state at the start of the timeline
+        timeline_start_members (dict): state at the start of the timeline
+            for room members who participate in this chunk of timeline.
+            Should always be a subset of timeline_start.
         previous (dict): state at the end of the previous sync (or empty dict
             if this is an initial sync)
         current (dict): state at the end of the timeline
@@ -1464,11 +1491,12 @@ def _calculate_state(timeline_contains, timeline_start, previous, current):
     }
 
     c_ids = set(e for e in current.values())
-    tc_ids = set(e for e in timeline_contains.values())
-    p_ids = set(e for e in previous.values())
     ts_ids = set(e for e in timeline_start.values())
+    tsm_ids = set(e for e in timeline_start_members.values())
+    p_ids = set(e for e in previous.values())
+    tc_ids = set(e for e in timeline_contains.values())
 
-    state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
+    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids
 
     return {
         event_id_to_key[e]: e for e in state_ids
-- 
cgit 1.4.1


From ccca02846d07124f537b0c475308f9a26bfb3fb1 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 13 Mar 2018 22:31:41 +0000
Subject: make it work

---
 synapse/handlers/sync.py |  6 +++---
 synapse/storage/state.py | 10 ++++++----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index c754cfdeeb..c05e3d107f 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -498,7 +498,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.member
+                            for t in state_ids if t[0] == EventTypes.Member
                         }
 
                 else:
@@ -511,7 +511,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.member
+                            for t in state_ids if t[0] == EventTypes.Member
                         }
 
                 timeline_state = {
@@ -542,7 +542,7 @@ class SyncHandler(object):
                 if filter_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
-                        for t in state_ids if t[0] == EventTypes.member
+                        for t in state_ids if t[0] == EventTypes.Member
                     }
 
                 timeline_state = {
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 4291cde7ab..9c9994c073 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -257,10 +257,11 @@ class StateGroupWorkerStore(SQLBaseStore):
                 if include_other_types:
                     # XXX: check whether this slows postgres down like a list of
                     # ORs does too?
+                    unique_types = set([ t for (t, _) in types ])
                     clause_to_args.append(
                         (
-                            "AND type <> ? " * len(types),
-                            [t for (t, _) in types]
+                            "AND type <> ? " * len(unique_types),
+                            list(unique_types)
                         )
                     )
             else:
@@ -293,10 +294,11 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_args.extend([typ[0], typ[1]])
 
                 if include_other_types:
+                    unique_types = set([ t for (t, _) in types ])                    
                     where_clauses.append(
-                        "(" + " AND ".join(["type <> ?"] * len(types)) + ")"
+                        "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
                     )
-                    where_args.extend(t for (t, _) in types)
+                    where_args.extend(list(unique_types))
 
                 where_clause = "AND (%s)" % (" OR ".join(where_clauses))
             else:
-- 
cgit 1.4.1


From c9d72e4571752554dfe01d755ae23f55c5f84ade Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 13 Mar 2018 23:46:45 +0000
Subject: oops

---
 synapse/handlers/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index c05e3d107f..887624c431 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -542,7 +542,7 @@ class SyncHandler(object):
                 if filter_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
-                        for t in state_ids if t[0] == EventTypes.Member
+                        for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
 
                 timeline_state = {
-- 
cgit 1.4.1


From 4d0cfef6ee023bfe83113a0378321830ebde1619 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Wed, 14 Mar 2018 00:02:20 +0000
Subject: add copyright to nudge CI

---
 synapse/handlers/sync.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 887624c431..edbd2ae771 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright 2015 - 2016 OpenMarket Ltd
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
-- 
cgit 1.4.1


From 3bc5bd2d22e6b53ec1f89760301df1517e71b53a Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 00:52:04 +0000
Subject: make incr syncs work

---
 synapse/handlers/sync.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index edbd2ae771..84c894ca4a 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -499,7 +499,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.Member
+                            for t in state_ids if state_ids[t][0] == EventTypes.Member
                         }
 
                 else:
@@ -512,7 +512,7 @@ class SyncHandler(object):
                     if filter_members:
                         member_state_ids = {
                             t: state_ids[t]
-                            for t in state_ids if t[0] == EventTypes.Member
+                            for t in state_ids if state_ids[t][0] == EventTypes.Member
                         }
 
                 timeline_state = {
@@ -543,7 +543,8 @@ class SyncHandler(object):
                 if filter_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
-                        for t in state_at_timeline_start if t[0] == EventTypes.Member
+                        for t in state_at_timeline_start
+                        if state_at_timeline_start[t][0] == EventTypes.Member
                     }
 
                 timeline_state = {
-- 
cgit 1.4.1


From 5b3b3aada8952b53f82723227c9758ed47450a2e Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:17:34 +0000
Subject: simplify timeline_start_members

---
 synapse/handlers/sync.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 84c894ca4a..ffb4f7915e 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -496,12 +496,6 @@ class SyncHandler(object):
                         batch.events[0].event_id, types=types
                     )
 
-                    if filter_members:
-                        member_state_ids = {
-                            t: state_ids[t]
-                            for t in state_ids if state_ids[t][0] == EventTypes.Member
-                        }
-
                 else:
                     current_state_ids = yield self.get_state_at(
                         room_id, stream_position=now_token, types=types
@@ -509,11 +503,13 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
-                    if filter_members:
-                        member_state_ids = {
-                            t: state_ids[t]
-                            for t in state_ids if state_ids[t][0] == EventTypes.Member
-                        }
+                if filter_members:
+                    logger.info("Finding members from %r", state_ids)
+                    member_state_ids = {
+                        e: state_ids[e]
+                        for e in state_ids if state_ids[e][0] == EventTypes.Member
+                    }
+                    logger.info("Found members %r", member_state_ids)
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -541,11 +537,14 @@ class SyncHandler(object):
                 )
 
                 if filter_members:
+                    logger.info("Finding members from %r", state_at_timeline_start)
                     member_state_ids = {
-                        t: state_at_timeline_start[t]
-                        for t in state_at_timeline_start
-                        if state_at_timeline_start[t][0] == EventTypes.Member
+                        e: state_at_timeline_start[e]
+                        for e in state_at_timeline_start
+                        if state_at_timeline_start[e][0] == EventTypes.Member
                     }
+                    logger.info("Found members %r", member_state_ids)
+
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
-- 
cgit 1.4.1


From f7dcc404f216383bfd62e4611c6a28c3f13576dc Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:37:53 +0000
Subject: add state_ids for timeline entries

---
 synapse/handlers/sync.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index ffb4f7915e..9b7e598e74 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -474,6 +474,7 @@ class SyncHandler(object):
             if filter_members:
                 # We only request state for the members needed to display the
                 # timeline:
+
                 types = [
                     (EventTypes.Member, state_key)
                     for state_key in set(
@@ -481,11 +482,14 @@ class SyncHandler(object):
                         for event in batch.events
                     )
                 ]
-                types.append((None, None))  # don't just filter to room members
 
-                # TODO: we should opportunistically deduplicate these members too
+                # TODO: we should opportunistically deduplicate these members here
                 # within the same sync series (based on an in-memory cache)
 
+                if not types:
+                    filter_members = False
+                types.append((None, None))  # don't just filter to room members
+
             if full_state:
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
@@ -545,7 +549,6 @@ class SyncHandler(object):
                     }
                     logger.info("Found members %r", member_state_ids)
 
-
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -559,7 +562,14 @@ class SyncHandler(object):
                     current=current_state_ids,
                 )
             else:
-                state_ids = {}
+                if filter_members:
+                    # strip off the (None, None) and filter to just room members
+                    types = types[:-1]
+                    state_ids = yield self.store.get_state_ids_for_event(
+                        batch.events[0].event_id, types=types
+                    )
+                else:
+                    state_ids = {}
 
         state = {}
         if state_ids:
-- 
cgit 1.4.1


From 4f0493c850d4611e8ada42c1de54a18e8dc15a37 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:43:37 +0000
Subject: fix tsm search again

---
 synapse/handlers/sync.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 9b7e598e74..4bf85a128f 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -510,8 +510,8 @@ class SyncHandler(object):
                 if filter_members:
                     logger.info("Finding members from %r", state_ids)
                     member_state_ids = {
-                        e: state_ids[e]
-                        for e in state_ids if state_ids[e][0] == EventTypes.Member
+                        t: state_ids[t]
+                        for t in state_ids if t[0] == EventTypes.Member
                     }
                     logger.info("Found members %r", member_state_ids)
 
@@ -543,9 +543,8 @@ class SyncHandler(object):
                 if filter_members:
                     logger.info("Finding members from %r", state_at_timeline_start)
                     member_state_ids = {
-                        e: state_at_timeline_start[e]
-                        for e in state_at_timeline_start
-                        if state_at_timeline_start[e][0] == EventTypes.Member
+                        t: state_at_timeline_start[t]
+                        for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
                     logger.info("Found members %r", member_state_ids)
 
-- 
cgit 1.4.1


From fc5397fdf5acefd33bd3b808b6d8cc7c31b69b55 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 16 Mar 2018 01:44:55 +0000
Subject: remove debug

---
 synapse/handlers/sync.py | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4bf85a128f..b7f42bd594 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -508,12 +508,10 @@ class SyncHandler(object):
                     state_ids = current_state_ids
 
                 if filter_members:
-                    logger.info("Finding members from %r", state_ids)
                     member_state_ids = {
                         t: state_ids[t]
                         for t in state_ids if t[0] == EventTypes.Member
                     }
-                    logger.info("Found members %r", member_state_ids)
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -541,12 +539,10 @@ class SyncHandler(object):
                 )
 
                 if filter_members:
-                    logger.info("Finding members from %r", state_at_timeline_start)
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
-                    logger.info("Found members %r", member_state_ids)
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
-- 
cgit 1.4.1


From 366f730bf697fe8fbb18a509ec1852987bc80410 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Sun, 18 Mar 2018 21:40:35 +0000
Subject: only get member state IDs for incremental syncs if we're filtering

---
 synapse/handlers/sync.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index b7f42bd594..6b57afd97b 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -557,14 +557,14 @@ class SyncHandler(object):
                     current=current_state_ids,
                 )
             else:
+                state_ids = {}
                 if filter_members:
                     # strip off the (None, None) and filter to just room members
                     types = types[:-1]
-                    state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[0].event_id, types=types
-                    )
-                else:
-                    state_ids = {}
+                    if types:
+                        state_ids = yield self.store.get_state_ids_for_event(
+                            batch.events[0].event_id, types=types
+                        )
 
         state = {}
         if state_ids:
-- 
cgit 1.4.1


From 478af0f72005708dbbed23e30c547c3d66c07c0e Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 19 Mar 2018 01:00:12 +0000
Subject: reshuffle todo & comments

---
 synapse/handlers/sync.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 6b57afd97b..76f5057377 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -483,11 +483,15 @@ class SyncHandler(object):
                     )
                 ]
 
-                # TODO: we should opportunistically deduplicate these members here
-                # within the same sync series (based on an in-memory cache)
+                # We can't remove redundant member types at this stage as it has
+                # to be done based on event_id, and we don't have the member
+                # event ids until we've pulled them out of the DB.
 
                 if not types:
+                    # an optimisation to stop needlessly trying to calculate
+                    # member_state_ids
                     filter_members = False
+
                 types.append((None, None))  # don't just filter to room members
 
             if full_state:
@@ -559,6 +563,10 @@ class SyncHandler(object):
             else:
                 state_ids = {}
                 if filter_members:
+                    # TODO: filter out redundant members based on their mxids (not their
+                    # event_ids) at this point. We know we can do it based on mxid as this
+                    # is an non-gappy incremental sync.
+
                     # strip off the (None, None) and filter to just room members
                     types = types[:-1]
                     if types:
-- 
cgit 1.4.1


From b2f22829475ccfe19e994aedddb8d04995018bf4 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 19 Mar 2018 01:15:13 +0000
Subject: make lazy_load_members configurable in filters

---
 synapse/api/filtering.py |  6 ++++++
 synapse/handlers/sync.py | 18 +++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 83206348e5..339e4a31d6 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -260,6 +260,9 @@ class FilterCollection(object):
     def ephemeral_limit(self):
         return self._room_ephemeral_filter.limit()
 
+    def lazy_load_members(self):
+        return self._room_state_filter.lazy_load_members()
+
     def filter_presence(self, events):
         return self._presence_filter.filter(events)
 
@@ -416,6 +419,9 @@ class Filter(object):
     def limit(self):
         return self.filter_json.get("limit", 10)
 
+    def lazy_load_members(self):
+        return self.filter_json.get("lazy_load_members", False)
+
 
 def _matches_wildcard(actual_value, filter_value):
     if filter_value.endswith("*"):
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 76f5057377..f521d22e91 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -442,7 +442,7 @@ class SyncHandler(object):
 
     @defer.inlineCallbacks
     def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token,
-                            full_state, filter_members):
+                            full_state):
         """ Works out the differnce in state between the start of the timeline
         and the previous sync.
 
@@ -455,7 +455,7 @@ class SyncHandler(object):
                 be None.
             now_token(str): Token of the end of the current batch.
             full_state(bool): Whether to force returning the full state.
-            filter_members(bool): Whether to only return state for members
+            lazy_load_members(bool): Whether to only return state for members
                 referenced in this timeline segment
 
         Returns:
@@ -470,8 +470,9 @@ class SyncHandler(object):
 
             types = None
             member_state_ids = {}
+            lazy_load_members = sync_config.filter_collection.lazy_load_members()
 
-            if filter_members:
+            if lazy_load_members:
                 # We only request state for the members needed to display the
                 # timeline:
 
@@ -490,7 +491,7 @@ class SyncHandler(object):
                 if not types:
                     # an optimisation to stop needlessly trying to calculate
                     # member_state_ids
-                    filter_members = False
+                    lazy_load_members = False
 
                 types.append((None, None))  # don't just filter to room members
 
@@ -511,7 +512,7 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
-                if filter_members:
+                if lazy_load_members:
                     member_state_ids = {
                         t: state_ids[t]
                         for t in state_ids if t[0] == EventTypes.Member
@@ -542,7 +543,7 @@ class SyncHandler(object):
                     batch.events[0].event_id, types=types
                 )
 
-                if filter_members:
+                if lazy_load_members:
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
@@ -562,7 +563,7 @@ class SyncHandler(object):
                 )
             else:
                 state_ids = {}
-                if filter_members:
+                if lazy_load_members:
                     # TODO: filter out redundant members based on their mxids (not their
                     # event_ids) at this point. We know we can do it based on mxid as this
                     # is an non-gappy incremental sync.
@@ -1380,8 +1381,7 @@ class SyncHandler(object):
             return
 
         state = yield self.compute_state_delta(
-            room_id, batch, sync_config, since_token, now_token,
-            full_state=full_state, filter_members=True
+            room_id, batch, sync_config, since_token, now_token, full_state=full_state
         )
 
         if room_builder.rtype == "joined":
-- 
cgit 1.4.1


From a6c8f7c875348ff8d63a7032c2f73a08551c516c Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 29 May 2018 01:09:55 +0100
Subject: add pydoc

---
 synapse/handlers/sync.py | 18 ++++++++----
 synapse/storage/state.py | 76 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 67 insertions(+), 27 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 05bf6d46dd..8e38078332 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -423,7 +423,11 @@ class SyncHandler(object):
 
         Args:
             event(synapse.events.EventBase): event of interest
-
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
         Returns:
             A Deferred map from ((type, state_key)->Event)
         """
@@ -440,6 +444,11 @@ class SyncHandler(object):
         Args:
             room_id(str): room for which to get state
             stream_position(StreamToken): point at which to get state
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A Deferred map from ((type, state_key)->Event)
@@ -472,8 +481,6 @@ class SyncHandler(object):
                 be None.
             now_token(str): Token of the end of the current batch.
             full_state(bool): Whether to force returning the full state.
-            lazy_load_members(bool): Whether to only return state for members
-                referenced in this timeline segment
 
         Returns:
              A deferred new event dictionary
@@ -496,7 +503,7 @@ class SyncHandler(object):
                 types = [
                     (EventTypes.Member, state_key)
                     for state_key in set(
-                        event.sender  # FIXME: we also care about targets etc.
+                        event.sender  # FIXME: we also care about invite targets etc.
                         for event in batch.events
                     )
                 ]
@@ -1398,7 +1405,8 @@ class SyncHandler(object):
             return
 
         state = yield self.compute_state_delta(
-            room_id, batch, sync_config, since_token, now_token, full_state=full_state
+            room_id, batch, sync_config, since_token, now_token,
+            full_state=full_state
         )
 
         if room_builder.rtype == "joined":
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 55159e64d0..63b6834202 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -182,7 +182,19 @@ class StateGroupWorkerStore(SQLBaseStore):
 
     @defer.inlineCallbacks
     def _get_state_groups_from_groups(self, groups, types):
-        """Returns dictionary state_group -> (dict of (type, state_key) -> event id)
+        """Returns the state groups for a given set of groups, filtering on
+        types of state events.
+
+        Args:
+            groups(list[int]): list of state group IDs to query
+            types(list[str|None, str|None])|None: List of 2-tuples of the form
+                (`type`, `state_key`), where a `state_key` of `None` matches all
+                state_keys for the `type`. Presence of type of `None` indicates
+                that types not in the list should not be filtered out. If None,
+                all types are returned.
+
+        Returns:
+            dictionary state_group -> (dict of (type, state_key) -> event id)
         """
         results = {}
 
@@ -204,6 +216,9 @@ class StateGroupWorkerStore(SQLBaseStore):
         if types is not None:
             type_set = set(types)
             if (None, None) in type_set:
+                # special case (None, None) to mean that other types should be
+                # returned - i.e. we were just filtering down the state keys
+                # for particular types.
                 include_other_types = True
                 type_set.remove((None, None))
             types = list(type_set)  # deduplicate types list
@@ -360,10 +375,12 @@ class StateGroupWorkerStore(SQLBaseStore):
         that are in the `types` list.
 
         Args:
-            event_ids (list)
-            types (list): List of (type, state_key) tuples which are used to
-                filter the state fetched. `state_key` may be None, which matches
-                any `state_key`
+            event_ids (list[string])
+            types (list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             deferred: A list of dicts corresponding to the event_ids given.
@@ -399,9 +416,11 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             event_ids(list(str)): events whose state should be returned
-            types(list[(str, str)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. May be None, which
-                matches any key
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A deferred dict from event_id -> (type, state_key) -> state_event
@@ -427,9 +446,11 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str, str)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. May be None, which
-                matches any key
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
@@ -444,9 +465,11 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str, str)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. May be None, which
-                matches any key
+            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+                which are used to filter the state fetched. If `state_key` is None,
+                all events are returned of the given type.  Presence of type of `None`
+                indicates that types not in the list should not be filtered out.
+                May be None, which matches any key.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
@@ -492,11 +515,11 @@ class StateGroupWorkerStore(SQLBaseStore):
         missing state.
 
         Args:
-            group: The state group to lookup
-            types (list): List of 2-tuples of the form (`type`, `state_key`),
-                where a `state_key` of `None` matches all state_keys for the
-                `type`. Presence of type of `None` indicates that types not
-                in the list should not be filtered out.
+            group(int): The state group to lookup
+            types(list[str|None, str|None]): List of 2-tuples of the form
+                (`type`, `state_key`), where a `state_key` of `None` matches all
+                state_keys for the `type`. Presence of type of `None` indicates
+                that types not in the list should not be filtered out.
         """
         is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
 
@@ -560,9 +583,18 @@ class StateGroupWorkerStore(SQLBaseStore):
     @defer.inlineCallbacks
     def _get_state_for_groups(self, groups, types=None):
         """Given list of groups returns dict of group -> list of state events
-        with matching types. `types` is a list of `(type, state_key)`, where
-        a `state_key` of None matches all state_keys. If `types` is None then
-        all events are returned.
+        with matching types.
+
+        Args:
+            groups(list[int]): list of groups whose state to query
+            types(list[str|None, str|None]|None): List of 2-tuples of the form
+                (`type`, `state_key`), where a `state_key` of `None` matches all
+                state_keys for the `type`. Presence of type of `None` indicates
+                that types not in the list should not be filtered out. If None,
+                all events are returned.
+
+        Returns:
+            dict of group -> list of state events
         """
         if types:
             types = frozenset(types)
-- 
cgit 1.4.1


From 5f6122fe102f994e023d530cb6076730f31f619f Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 4 Jun 2018 00:08:52 +0300
Subject: more comments

---
 synapse/handlers/sync.py | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 8e38078332..7ab97b24a6 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -515,6 +515,9 @@ class SyncHandler(object):
                 if not types:
                     # an optimisation to stop needlessly trying to calculate
                     # member_state_ids
+                    #
+                    # XXX: i can't remember what this trying to do. why would
+                    # types ever be []? --matthew
                     lazy_load_members = False
 
                 types.append((None, None))  # don't just filter to room members
@@ -568,6 +571,10 @@ class SyncHandler(object):
                 )
 
                 if lazy_load_members:
+                    # TODO: filter out redundant members based on their event_ids
+                    # (not mxids) at this point. In practice, limited syncs are
+                    # relatively rare so it's not a total disaster to send redundant
+                    # members down at this point.
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
-- 
cgit 1.4.1


From 924eb34d9428a4163a03249abbb6f40d4baa29c6 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Thu, 19 Jul 2018 18:32:02 +0100
Subject: add a filtered_types param to limit filtering to specific types

---
 synapse/handlers/sync.py |  65 +++++++++++++++------------
 synapse/storage/state.py | 113 +++++++++++++++++++++++++----------------------
 2 files changed, 96 insertions(+), 82 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0c21ac2c77..cb711b8758 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -417,38 +417,44 @@ class SyncHandler(object):
         ))
 
     @defer.inlineCallbacks
-    def get_state_after_event(self, event, types=None):
+    def get_state_after_event(self, event, types=None, filtered_types=None):
         """
         Get the room state after the given event
 
         Args:
             event(synapse.events.EventBase): event of interest
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
+
         Returns:
             A Deferred map from ((type, state_key)->Event)
         """
-        state_ids = yield self.store.get_state_ids_for_event(event.event_id, types)
+        state_ids = yield self.store.get_state_ids_for_event(
+            event.event_id, types, filtered_types=filtered_types
+        )
         if event.is_state():
             state_ids = state_ids.copy()
             state_ids[(event.type, event.state_key)] = event.event_id
         defer.returnValue(state_ids)
 
     @defer.inlineCallbacks
-    def get_state_at(self, room_id, stream_position, types=None):
+    def get_state_at(self, room_id, stream_position, types=None, filtered_types=None):
         """ Get the room state at a particular stream position
 
         Args:
             room_id(str): room for which to get state
             stream_position(StreamToken): point at which to get state
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
-                May be None, which matches any key.
+                all events are returned of the given type.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A Deferred map from ((type, state_key)->Event)
@@ -463,7 +469,9 @@ class SyncHandler(object):
 
         if last_events:
             last_event = last_events[-1]
-            state = yield self.get_state_after_event(last_event, types)
+            state = yield self.get_state_after_event(
+                last_event, types, filtered_types=filtered_types
+            )
 
         else:
             # no events in this room - so presumably no state
@@ -499,6 +507,7 @@ class SyncHandler(object):
             types = None
             member_state_ids = {}
             lazy_load_members = sync_config.filter_collection.lazy_load_members()
+            filtered_types = None
 
             if lazy_load_members:
                 # We only request state for the members needed to display the
@@ -516,29 +525,25 @@ class SyncHandler(object):
                 # to be done based on event_id, and we don't have the member
                 # event ids until we've pulled them out of the DB.
 
-                if not types:
-                    # an optimisation to stop needlessly trying to calculate
-                    # member_state_ids
-                    #
-                    # XXX: i can't remember what this trying to do. why would
-                    # types ever be []? --matthew
-                    lazy_load_members = False
-
-                types.append((None, None))  # don't just filter to room members
+                # only apply the filtering to room members
+                filtered_types = [EventTypes.Member]
 
             if full_state:
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[-1].event_id, types=types
+                        batch.events[-1].event_id, types=types,
+                        filtered_types=filtered_types
                     )
 
                     state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[0].event_id, types=types
+                        batch.events[0].event_id, types=types,
+                        filtered_types=filtered_types
                     )
 
                 else:
                     current_state_ids = yield self.get_state_at(
-                        room_id, stream_position=now_token, types=types
+                        room_id, stream_position=now_token, types=types,
+                        filtered_types=filtered_types
                     )
 
                     state_ids = current_state_ids
@@ -563,15 +568,18 @@ class SyncHandler(object):
                 )
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
-                    room_id, stream_position=since_token, types=types
+                    room_id, stream_position=since_token, types=types,
+                    filtered_types=filtered_types
                 )
 
                 current_state_ids = yield self.store.get_state_ids_for_event(
-                    batch.events[-1].event_id, types=types
+                    batch.events[-1].event_id, types=types,
+                    filtered_types=filtered_types
                 )
 
                 state_at_timeline_start = yield self.store.get_state_ids_for_event(
-                    batch.events[0].event_id, types=types
+                    batch.events[0].event_id, types=types,
+                    filtered_types=filtered_types
                 )
 
                 if lazy_load_members:
@@ -603,11 +611,10 @@ class SyncHandler(object):
                     # event_ids) at this point. We know we can do it based on mxid as this
                     # is an non-gappy incremental sync.
 
-                    # strip off the (None, None) and filter to just room members
-                    types = types[:-1]
                     if types:
                         state_ids = yield self.store.get_state_ids_for_event(
-                            batch.events[0].event_id, types=types
+                            batch.events[0].event_id, types=types,
+                            filtered_types=filtered_types
                         )
 
         state = {}
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index c5ff44fef7..ee531a2ce0 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         })
 
     @defer.inlineCallbacks
-    def _get_state_groups_from_groups(self, groups, types):
+    def _get_state_groups_from_groups(self, groups, types, filtered_types=None):
         """Returns the state groups for a given set of groups, filtering on
         types of state events.
 
@@ -193,9 +193,10 @@ class StateGroupWorkerStore(SQLBaseStore):
             groups(list[int]): list of state group IDs to query
             types(list[str|None, str|None])|None: List of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
-                state_keys for the `type`. Presence of type of `None` indicates
-                that types not in the list should not be filtered out. If None,
-                all types are returned.
+                state_keys for the `type`. If None, all types are returned.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             dictionary state_group -> (dict of (type, state_key) -> event id)
@@ -206,26 +207,21 @@ class StateGroupWorkerStore(SQLBaseStore):
         for chunk in chunks:
             res = yield self.runInteraction(
                 "_get_state_groups_from_groups",
-                self._get_state_groups_from_groups_txn, chunk, types,
+                self._get_state_groups_from_groups_txn, chunk, types, filtered_types
             )
             results.update(res)
 
         defer.returnValue(results)
 
-    def _get_state_groups_from_groups_txn(self, txn, groups, types=None):
+    def _get_state_groups_from_groups_txn(
+        self, txn, groups, types=None, filtered_types=None
+    ):
         results = {group: {} for group in groups}
 
-        include_other_types = False
+        include_other_types = False if filtered_types is None else True
 
         if types is not None:
-            type_set = set(types)
-            if (None, None) in type_set:
-                # special case (None, None) to mean that other types should be
-                # returned - i.e. we were just filtering down the state keys
-                # for particular types.
-                include_other_types = True
-                type_set.remove((None, None))
-            types = list(type_set)  # deduplicate types list
+            types = list(set(types))  # deduplicate types list
 
         if isinstance(self.database_engine, PostgresEngine):
             # Temporarily disable sequential scans in this transaction. This is
@@ -276,7 +272,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                 if include_other_types:
                     # XXX: check whether this slows postgres down like a list of
                     # ORs does too?
-                    unique_types = set([t for (t, _) in types])
+                    unique_types = set(filtered_types)
                     clause_to_args.append(
                         (
                             "AND type <> ? " * len(unique_types),
@@ -313,7 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_args.extend([typ[0], typ[1]])
 
                 if include_other_types:
-                    unique_types = set([t for (t, _) in types])
+                    unique_types = set(filtered_types)
                     where_clauses.append(
                         "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
                     )
@@ -373,18 +369,20 @@ class StateGroupWorkerStore(SQLBaseStore):
         return results
 
     @defer.inlineCallbacks
-    def get_state_for_events(self, event_ids, types):
+    def get_state_for_events(self, event_ids, types, filtered_types):
         """Given a list of event_ids and type tuples, return a list of state
         dicts for each event. The state dicts will only have the type/state_keys
         that are in the `types` list.
 
         Args:
             event_ids (list[string])
-            types (list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types (list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             deferred: A list of dicts corresponding to the event_ids given.
@@ -395,7 +393,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         )
 
         groups = set(itervalues(event_to_groups))
-        group_to_state = yield self._get_state_for_groups(groups, types)
+        group_to_state = yield self._get_state_for_groups(groups, types, filtered_types)
 
         state_event_map = yield self.get_events(
             [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)],
@@ -414,17 +412,19 @@ class StateGroupWorkerStore(SQLBaseStore):
         defer.returnValue({event: event_to_state[event] for event in event_ids})
 
     @defer.inlineCallbacks
-    def get_state_ids_for_events(self, event_ids, types=None):
+    def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None):
         """
         Get the state dicts corresponding to a list of events
 
         Args:
             event_ids(list(str)): events whose state should be returned
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A deferred dict from event_id -> (type, state_key) -> state_event
@@ -434,7 +434,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         )
 
         groups = set(itervalues(event_to_groups))
-        group_to_state = yield self._get_state_for_groups(groups, types)
+        group_to_state = yield self._get_state_for_groups(groups, types, filtered_types)
 
         event_to_state = {
             event_id: group_to_state[group]
@@ -444,41 +444,45 @@ class StateGroupWorkerStore(SQLBaseStore):
         defer.returnValue({event: event_to_state[event] for event in event_ids})
 
     @defer.inlineCallbacks
-    def get_state_for_event(self, event_id, types=None):
+    def get_state_for_event(self, event_id, types=None, filtered_types=None):
         """
         Get the state dict corresponding to a particular event
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
         """
-        state_map = yield self.get_state_for_events([event_id], types)
+        state_map = yield self.get_state_for_events([event_id], types, filtered_types)
         defer.returnValue(state_map[event_id])
 
     @defer.inlineCallbacks
-    def get_state_ids_for_event(self, event_id, types=None):
+    def get_state_ids_for_event(self, event_id, types=None, filtered_types=None):
         """
         Get the state dict corresponding to a particular event
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str|None, str|None)]|None): List of (type, state_key) tuples
+            types(list[(str, str|None)]|None): List of (type, state_key) tuples
                 which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.  Presence of type of `None`
-                indicates that types not in the list should not be filtered out.
+                all events are returned of the given type.
                 May be None, which matches any key.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
         """
-        state_map = yield self.get_state_ids_for_events([event_id], types)
+        state_map = yield self.get_state_ids_for_events([event_id], types, filtered_types)
         defer.returnValue(state_map[event_id])
 
     @cached(max_entries=50000)
@@ -509,7 +513,7 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         defer.returnValue({row["event_id"]: row["state_group"] for row in rows})
 
-    def _get_some_state_from_cache(self, group, types):
+    def _get_some_state_from_cache(self, group, types, filtered_types=None):
         """Checks if group is in cache. See `_get_state_for_groups`
 
         Returns 3-tuple (`state_dict`, `missing_types`, `got_all`).
@@ -520,29 +524,30 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             group(int): The state group to lookup
-            types(list[str|None, str|None]): List of 2-tuples of the form
+            types(list[str, str|None]): List of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
-                state_keys for the `type`. Presence of type of `None` indicates
-                that types not in the list should not be filtered out.
+                state_keys for the `type`.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
         """
         is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
 
         type_to_key = {}
+
+        # tracks which of the requested types are missing from our cache
         missing_types = set()
 
-        include_other_types = False
+        include_other_types = True if filtered_types is None else False
 
         for typ, state_key in types:
             key = (typ, state_key)
 
-            if typ is None:
-                include_other_types = True
-                next
-
             if state_key is None:
                 type_to_key[typ] = None
                 # XXX: why do we mark the type as missing from our cache just
                 # because we weren't filtering on a specific value of state_key?
+                # is it because the cache doesn't handle wildcards?
                 missing_types.add(key)
             else:
                 if type_to_key.get(typ, object()) is not None:
@@ -556,7 +561,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         def include(typ, state_key):
             valid_state_keys = type_to_key.get(typ, sentinel)
             if valid_state_keys is sentinel:
-                return include_other_types
+                return include_other_types and typ not in filtered_types
             if valid_state_keys is None:
                 return True
             if state_key in valid_state_keys:
@@ -585,21 +590,23 @@ class StateGroupWorkerStore(SQLBaseStore):
         return state_dict_ids, is_all
 
     @defer.inlineCallbacks
-    def _get_state_for_groups(self, groups, types=None):
+    def _get_state_for_groups(self, groups, types=None, filtered_types=None):
         """Gets the state at each of a list of state groups, optionally
         filtering by type/state_key
 
         Args:
             groups (iterable[int]): list of state groups for which we want
                 to get the state.
-            types (None|iterable[(None|str, None|str)]):
+            types (None|iterable[(None, None|str)]):
                 indicates the state type/keys required. If None, the whole
                 state is fetched and returned.
 
                 Otherwise, each entry should be a `(type, state_key)` tuple to
                 include in the response. A `state_key` of None is a wildcard
-                meaning that we require all state with that type. A `type` of None
-                indicates that types not in the list should not be filtered out.
+                meaning that we require all state with that type.
+            filtered_types(list[str]|None): Only apply filtering via `types` to this
+                list of event types.  Other types of events are returned unfiltered.
+                If None, `types` filtering is applied to all events.
 
         Returns:
             Deferred[dict[int, dict[(type, state_key), EventBase]]]
@@ -612,7 +619,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         if types is not None:
             for group in set(groups):
                 state_dict_ids, _, got_all = self._get_some_state_from_cache(
-                    group, types,
+                    group, types, filtered_types
                 )
                 results[group] = state_dict_ids
 
@@ -645,7 +652,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                 types_to_fetch = types
 
             group_to_state_dict = yield self._get_state_groups_from_groups(
-                missing_groups, types_to_fetch,
+                missing_groups, types_to_fetch, filtered_types
             )
 
             for group, group_state_dict in iteritems(group_to_state_dict):
-- 
cgit 1.4.1


From bcaec2915ac74937171e27d507b8f9c0e39d3677 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Thu, 19 Jul 2018 19:03:50 +0100
Subject: incorporate review

---
 synapse/handlers/sync.py | 44 +++++++++++++++++++++++++++-----------------
 synapse/storage/state.py |  7 ++++---
 2 files changed, 31 insertions(+), 20 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index cb711b8758..b597f94cf6 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -435,7 +435,7 @@ class SyncHandler(object):
             A Deferred map from ((type, state_key)->Event)
         """
         state_ids = yield self.store.get_state_ids_for_event(
-            event.event_id, types, filtered_types=filtered_types
+            event.event_id, types, filtered_types=filtered_types,
         )
         if event.is_state():
             state_ids = state_ids.copy()
@@ -470,7 +470,7 @@ class SyncHandler(object):
         if last_events:
             last_event = last_events[-1]
             state = yield self.get_state_after_event(
-                last_event, types, filtered_types=filtered_types
+                last_event, types, filtered_types=filtered_types,
             )
 
         else:
@@ -505,7 +505,6 @@ class SyncHandler(object):
         with Measure(self.clock, "compute_state_delta"):
 
             types = None
-            member_state_ids = {}
             lazy_load_members = sync_config.filter_collection.lazy_load_members()
             filtered_types = None
 
@@ -521,10 +520,6 @@ class SyncHandler(object):
                     )
                 ]
 
-                # We can't remove redundant member types at this stage as it has
-                # to be done based on event_id, and we don't have the member
-                # event ids until we've pulled them out of the DB.
-
                 # only apply the filtering to room members
                 filtered_types = [EventTypes.Member]
 
@@ -532,27 +527,32 @@ class SyncHandler(object):
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
                         batch.events[-1].event_id, types=types,
-                        filtered_types=filtered_types
+                        filtered_types=filtered_types,
                     )
 
                     state_ids = yield self.store.get_state_ids_for_event(
                         batch.events[0].event_id, types=types,
-                        filtered_types=filtered_types
+                        filtered_types=filtered_types,
                     )
 
                 else:
                     current_state_ids = yield self.get_state_at(
                         room_id, stream_position=now_token, types=types,
-                        filtered_types=filtered_types
+                        filtered_types=filtered_types,
                     )
 
                     state_ids = current_state_ids
 
+                # track the membership state events as of the beginning of this
+                # timeline sequence, so they can be filtered out of the state
+                # if we are lazy loading members.
                 if lazy_load_members:
                     member_state_ids = {
                         t: state_ids[t]
                         for t in state_ids if t[0] == EventTypes.Member
                     }
+                else:
+                    member_state_ids = {}
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -569,28 +569,38 @@ class SyncHandler(object):
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
                     room_id, stream_position=since_token, types=types,
-                    filtered_types=filtered_types
+                    filtered_types=filtered_types,
                 )
 
                 current_state_ids = yield self.store.get_state_ids_for_event(
                     batch.events[-1].event_id, types=types,
-                    filtered_types=filtered_types
+                    filtered_types=filtered_types,
                 )
 
                 state_at_timeline_start = yield self.store.get_state_ids_for_event(
                     batch.events[0].event_id, types=types,
-                    filtered_types=filtered_types
+                    filtered_types=filtered_types,
                 )
 
+                # track the membership state events as of the beginning of this
+                # timeline sequence, so they can be filtered out of the state
+                # if we are lazy loading members.
                 if lazy_load_members:
-                    # TODO: filter out redundant members based on their event_ids
-                    # (not mxids) at this point. In practice, limited syncs are
+                    # TODO: optionally filter out redundant membership events at this
+                    # point, to stop repeatedly sending members in every /sync as if
+                    # the client isn't tracking them.
+                    # When implement, this should filter using event_ids (not mxids).
+                    # In practice, limited syncs are
                     # relatively rare so it's not a total disaster to send redundant
-                    # members down at this point.
+                    # members down at this point. Redundant members are ones which
+                    # repeatedly get sent down /sync because we don't know if the client
+                    # is caching them or not.
                     member_state_ids = {
                         t: state_at_timeline_start[t]
                         for t in state_at_timeline_start if t[0] == EventTypes.Member
                     }
+                else:
+                    member_state_ids = {}
 
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
@@ -614,7 +624,7 @@ class SyncHandler(object):
                     if types:
                         state_ids = yield self.store.get_state_ids_for_event(
                             batch.events[0].event_id, types=types,
-                            filtered_types=filtered_types
+                            filtered_types=filtered_types,
                         )
 
         state = {}
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index ee531a2ce0..75c6366e7a 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -545,9 +545,10 @@ class StateGroupWorkerStore(SQLBaseStore):
 
             if state_key is None:
                 type_to_key[typ] = None
-                # XXX: why do we mark the type as missing from our cache just
-                # because we weren't filtering on a specific value of state_key?
-                # is it because the cache doesn't handle wildcards?
+                # we mark the type as missing from the cache because
+                # when the cache was populated it might have been done with a
+                # restricted set of state_keys, so the wildcard will not work
+                # and the cache may be incomplete.
                 missing_types.add(key)
             else:
                 if type_to_key.get(typ, object()) is not None:
-- 
cgit 1.4.1


From 254fb430d1662c93c56c2abbd6984e07fb04c36b Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 23 Jul 2018 19:21:20 +0100
Subject: incorporate review

---
 synapse/handlers/sync.py | 67 +++++++++++++++++++-----------------------------
 synapse/storage/state.py | 20 ++++++---------
 2 files changed, 35 insertions(+), 52 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index b597f94cf6..5689ad2f58 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -543,17 +543,6 @@ class SyncHandler(object):
 
                     state_ids = current_state_ids
 
-                # track the membership state events as of the beginning of this
-                # timeline sequence, so they can be filtered out of the state
-                # if we are lazy loading members.
-                if lazy_load_members:
-                    member_state_ids = {
-                        t: state_ids[t]
-                        for t in state_ids if t[0] == EventTypes.Member
-                    }
-                else:
-                    member_state_ids = {}
-
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
@@ -562,9 +551,9 @@ class SyncHandler(object):
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_ids,
-                    timeline_start_members=member_state_ids,
                     previous={},
                     current=current_state_ids,
+                    lazy_load_members=lazy_load_members,
                 )
             elif batch.limited:
                 state_at_previous_sync = yield self.get_state_at(
@@ -582,37 +571,27 @@ class SyncHandler(object):
                     filtered_types=filtered_types,
                 )
 
-                # track the membership state events as of the beginning of this
-                # timeline sequence, so they can be filtered out of the state
-                # if we are lazy loading members.
-                if lazy_load_members:
-                    # TODO: optionally filter out redundant membership events at this
-                    # point, to stop repeatedly sending members in every /sync as if
-                    # the client isn't tracking them.
-                    # When implement, this should filter using event_ids (not mxids).
-                    # In practice, limited syncs are
-                    # relatively rare so it's not a total disaster to send redundant
-                    # members down at this point. Redundant members are ones which
-                    # repeatedly get sent down /sync because we don't know if the client
-                    # is caching them or not.
-                    member_state_ids = {
-                        t: state_at_timeline_start[t]
-                        for t in state_at_timeline_start if t[0] == EventTypes.Member
-                    }
-                else:
-                    member_state_ids = {}
-
                 timeline_state = {
                     (event.type, event.state_key): event.event_id
                     for event in batch.events if event.is_state()
                 }
 
+                # TODO: optionally filter out redundant membership events at this
+                # point, to stop repeatedly sending members in every /sync as if
+                # the client isn't tracking them.
+                # When implemented, this should filter using event_ids (not mxids).
+                # In practice, limited syncs are
+                # relatively rare so it's not a total disaster to send redundant
+                # members down at this point. Redundant members are ones which
+                # repeatedly get sent down /sync because we don't know if the client
+                # is caching them or not.
+
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_at_timeline_start,
-                    timeline_start_members=member_state_ids,
                     previous=state_at_previous_sync,
                     current=current_state_ids,
+                    lazy_load_members=lazy_load_members,
                 )
             else:
                 state_ids = {}
@@ -1536,16 +1515,14 @@ def _action_has_highlight(actions):
     return False
 
 
-def _calculate_state(timeline_contains, timeline_start, timeline_start_members,
-                     previous, current):
+def _calculate_state(
+    timeline_contains, timeline_start, previous, current, lazy_load_members,
+):
     """Works out what state to include in a sync response.
 
     Args:
         timeline_contains (dict): state in the timeline
         timeline_start (dict): state at the start of the timeline
-        timeline_start_members (dict): state at the start of the timeline
-            for room members who participate in this chunk of timeline.
-            Should always be a subset of timeline_start.
         previous (dict): state at the end of the previous sync (or empty dict
             if this is an initial sync)
         current (dict): state at the end of the timeline
@@ -1565,11 +1542,21 @@ def _calculate_state(timeline_contains, timeline_start, timeline_start_members,
 
     c_ids = set(e for e in current.values())
     ts_ids = set(e for e in timeline_start.values())
-    tsm_ids = set(e for e in timeline_start_members.values())
     p_ids = set(e for e in previous.values())
     tc_ids = set(e for e in timeline_contains.values())
 
-    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids
+    # track the membership events in the state as of the start of the timeline
+    # so we can add them back in to the state if we're lazyloading.  We don't
+    # add them into state if they're already contained in the timeline.
+    if lazy_load_members:
+        ll_ids = set(
+            e for t, e in timeline_start.iteritems()
+            if t[0] == EventTypes.Member and e not in tc_ids
+        )
+    else:
+        ll_ids = set()
+
+    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids
 
     return {
         event_id_to_key[e]: e for e in state_ids
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index f09be7172d..40ca8bd2a2 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -191,10 +191,10 @@ class StateGroupWorkerStore(SQLBaseStore):
 
         Args:
             groups(list[int]): list of state group IDs to query
-            types(list[str|None, str|None])|None: List of 2-tuples of the form
+            types (Iterable[str, str|None]|None): list of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
                 state_keys for the `type`. If None, all types are returned.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
+            filtered_types(Iterable[str]|None): Only apply filtering via `types` to this
                 list of event types.  Other types of events are returned unfiltered.
                 If None, `types` filtering is applied to all events.
 
@@ -207,19 +207,17 @@ class StateGroupWorkerStore(SQLBaseStore):
         for chunk in chunks:
             res = yield self.runInteraction(
                 "_get_state_groups_from_groups",
-                self._get_state_groups_from_groups_txn, chunk, types, filtered_types
+                self._get_state_groups_from_groups_txn, chunk, types, filtered_types,
             )
             results.update(res)
 
         defer.returnValue(results)
 
     def _get_state_groups_from_groups_txn(
-        self, txn, groups, types=None, filtered_types=None
+        self, txn, groups, types=None, filtered_types=None,
     ):
         results = {group: {} for group in groups}
 
-        include_other_types = False if filtered_types is None else True
-
         if types is not None:
             types = list(set(types))  # deduplicate types list
 
@@ -269,7 +267,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                     for etype, state_key in types
                 ]
 
-                if include_other_types:
+                if filtered_types is not None:
                     # XXX: check whether this slows postgres down like a list of
                     # ORs does too?
                     unique_types = set(filtered_types)
@@ -308,7 +306,7 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_clauses.append("(type = ? AND state_key = ?)")
                         where_args.extend([typ[0], typ[1]])
 
-                if include_other_types:
+                if filtered_types is not None:
                     unique_types = set(filtered_types)
                     where_clauses.append(
                         "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
@@ -538,8 +536,6 @@ class StateGroupWorkerStore(SQLBaseStore):
         # tracks which of the requested types are missing from our cache
         missing_types = set()
 
-        include_other_types = False if filtered_types is None else True
-
         for typ, state_key in types:
             key = (typ, state_key)
 
@@ -562,7 +558,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         def include(typ, state_key):
             valid_state_keys = type_to_key.get(typ, sentinel)
             if valid_state_keys is sentinel:
-                return include_other_types and typ not in filtered_types
+                return filtered_types is not None and typ not in filtered_types
             if valid_state_keys is None:
                 return True
             if state_key in valid_state_keys:
@@ -598,7 +594,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         Args:
             groups (iterable[int]): list of state groups for which we want
                 to get the state.
-            types (None|iterable[(None, None|str)]):
+            types (None|iterable[(str, None|str)]):
                 indicates the state type/keys required. If None, the whole
                 state is fetched and returned.
 
-- 
cgit 1.4.1


From cd241d6bda01a761fbe1ca29727dacd918fb8975 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 24 Jul 2018 12:39:40 +0100
Subject: incorporate more review

---
 synapse/handlers/sync.py    | 12 +++++++++---
 synapse/storage/state.py    | 36 +++++++++---------------------------
 tests/storage/test_state.py |  9 +++++++++
 3 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 5689ad2f58..e5a2329d73 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1526,6 +1526,9 @@ def _calculate_state(
         previous (dict): state at the end of the previous sync (or empty dict
             if this is an initial sync)
         current (dict): state at the end of the timeline
+        lazy_load_members (bool): whether to return members from timeline_start
+            or not.  assumes that timeline_start has already been filtered to
+            include only the members the client needs to know about.
 
     Returns:
         dict
@@ -1545,9 +1548,12 @@ def _calculate_state(
     p_ids = set(e for e in previous.values())
     tc_ids = set(e for e in timeline_contains.values())
 
-    # track the membership events in the state as of the start of the timeline
-    # so we can add them back in to the state if we're lazyloading.  We don't
-    # add them into state if they're already contained in the timeline.
+    # If we are lazyloading room members, we explicitly add the membership events
+    # for the senders in the timeline into the state block returned by /sync,
+    # as we may not have sent them to the client before.  We find these membership
+    # events by filtering them out of timeline_start, which has already been filtered
+    # to only include membership events for the senders in the timeline.
+
     if lazy_load_members:
         ll_ids = set(
             e for t, e in timeline_start.iteritems()
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index f99d3871e4..1413a6f910 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         })
 
     @defer.inlineCallbacks
-    def _get_state_groups_from_groups(self, groups, types, filtered_types=None):
+    def _get_state_groups_from_groups(self, groups, types):
         """Returns the state groups for a given set of groups, filtering on
         types of state events.
 
@@ -194,9 +194,6 @@ class StateGroupWorkerStore(SQLBaseStore):
             types (Iterable[str, str|None]|None): list of 2-tuples of the form
                 (`type`, `state_key`), where a `state_key` of `None` matches all
                 state_keys for the `type`. If None, all types are returned.
-            filtered_types(Iterable[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
 
         Returns:
             dictionary state_group -> (dict of (type, state_key) -> event id)
@@ -207,14 +204,14 @@ class StateGroupWorkerStore(SQLBaseStore):
         for chunk in chunks:
             res = yield self.runInteraction(
                 "_get_state_groups_from_groups",
-                self._get_state_groups_from_groups_txn, chunk, types, filtered_types,
+                self._get_state_groups_from_groups_txn, chunk, types,
             )
             results.update(res)
 
         defer.returnValue(results)
 
     def _get_state_groups_from_groups_txn(
-        self, txn, groups, types=None, filtered_types=None,
+        self, txn, groups, types=None,
     ):
         results = {group: {} for group in groups}
 
@@ -266,17 +263,6 @@ class StateGroupWorkerStore(SQLBaseStore):
                     )
                     for etype, state_key in types
                 ]
-
-                if filtered_types is not None:
-                    # XXX: check whether this slows postgres down like a list of
-                    # ORs does too?
-                    unique_types = set(filtered_types)
-                    clause_to_args.append(
-                        (
-                            "AND type <> ? " * len(unique_types),
-                            list(unique_types)
-                        )
-                    )
             else:
                 # If types is None we fetch all the state, and so just use an
                 # empty where clause with no extra args.
@@ -306,13 +292,6 @@ class StateGroupWorkerStore(SQLBaseStore):
                         where_clauses.append("(type = ? AND state_key = ?)")
                         where_args.extend([typ[0], typ[1]])
 
-                if filtered_types is not None:
-                    unique_types = set(filtered_types)
-                    where_clauses.append(
-                        "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")"
-                    )
-                    where_args.extend(list(unique_types))
-
                 where_clause = "AND (%s)" % (" OR ".join(where_clauses))
             else:
                 where_clause = ""
@@ -643,13 +622,13 @@ class StateGroupWorkerStore(SQLBaseStore):
             # cache. Hence, if we are doing a wildcard lookup, populate the
             # cache fully so that we can do an efficient lookup next time.
 
-            if types and any(k is None for (t, k) in types):
+            if filtered_types or (types and any(k is None for (t, k) in types)):
                 types_to_fetch = None
             else:
                 types_to_fetch = types
 
             group_to_state_dict = yield self._get_state_groups_from_groups(
-                missing_groups, types_to_fetch, filtered_types
+                missing_groups, types_to_fetch
             )
 
             for group, group_state_dict in iteritems(group_to_state_dict):
@@ -659,7 +638,10 @@ class StateGroupWorkerStore(SQLBaseStore):
                 if types:
                     for k, v in iteritems(group_state_dict):
                         (typ, _) = k
-                        if k in types or (typ, None) in types:
+                        if (
+                            (k in types or (typ, None) in types) or
+                            (filtered_types and typ not in filtered_types)
+                        ):
                             state_dict[k] = v
                 else:
                     state_dict.update(group_state_dict)
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index 8924ba9f7f..b2f314e9db 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -158,3 +158,12 @@ class StateStoreTestCase(tests.unittest.TestCase):
             (e2.type, e2.state_key): e2,
             (e3.type, e3.state_key): e3,
         }, state)
+
+        state = yield self.store.get_state_for_event(
+            e5.event_id, [], filtered_types=[EventTypes.Member],
+        )
+
+        self.assertStateMapEqual({
+            (e1.type, e1.state_key): e1,
+            (e2.type, e2.state_key): e2,
+        }, state)
-- 
cgit 1.4.1


From eb1d911ab743e85154f7c4b2db8a954d152020dc Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 24 Jul 2018 13:40:49 +0100
Subject: rather than adding ll_ids, remove them from p_ids

---
 synapse/handlers/sync.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index e5a2329d73..1422843af8 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1553,16 +1553,17 @@ def _calculate_state(
     # as we may not have sent them to the client before.  We find these membership
     # events by filtering them out of timeline_start, which has already been filtered
     # to only include membership events for the senders in the timeline.
+    # In practice, we can do this by removing them from the p_ids list.
+    # see https://github.com/matrix-org/synapse/pull/2970
+    #            /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809
 
     if lazy_load_members:
-        ll_ids = set(
+        p_ids.difference_update(
             e for t, e in timeline_start.iteritems()
-            if t[0] == EventTypes.Member and e not in tc_ids
+            if t[0] == EventTypes.Member
         )
-    else:
-        ll_ids = set()
 
-    state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids
+    state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
 
     return {
         event_id_to_key[e]: e for e in state_ids
-- 
cgit 1.4.1


From 1a01a5b964d3ea373355684a91b9f7fd95726fbc Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 24 Jul 2018 14:03:15 +0100
Subject: clarify comment on p_ids

---
 synapse/handlers/sync.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'synapse/handlers/sync.py')

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 1422843af8..4ced3144c8 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1553,7 +1553,8 @@ def _calculate_state(
     # as we may not have sent them to the client before.  We find these membership
     # events by filtering them out of timeline_start, which has already been filtered
     # to only include membership events for the senders in the timeline.
-    # In practice, we can do this by removing them from the p_ids list.
+    # In practice, we can do this by removing them from the p_ids list,
+    # which is the list of relevant state we know we have already sent to the client.
     # see https://github.com/matrix-org/synapse/pull/2970
     #            /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809
 
-- 
cgit 1.4.1