Avoid creating events with huge numbers of prev_events

In most cases, we limit the number of prev_events for a given event to 10 events. This fixes a particular code path which created events with huge numbers of prev_events.
author: Richard van der Hoff <richard@matrix.org> 2018-04-16 18:41:37 +0100
committer: Richard van der Hoff <richard@matrix.org> 2018-04-16 18:41:37 +0100
commit: 639480e14a06723adf6817ddd2b2ff9e4f4cdf2a (patch)
tree: 5ab5ab8b86450776a7ee918bfd1cf73a9e48da6d /synapse/storage
parent: fix spurious changelog dup (diff)
download: synapse-639480e14a06723adf6817ddd2b2ff9e4f4cdf2a.tar.xz
1 files changed, 41 insertions, 16 deletions
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index 00ee82d300..a183fc6b50 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import random
 
 from twisted.internet import defer
 
@@ -133,7 +134,47 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
             retcol="event_id",
         )
 
+    @defer.inlineCallbacks
+    def get_prev_events_for_room(self, room_id):
+        """
+        Gets a subset of the current forward extremities in the given room.
+
+        Limits the result to 10 extremities, so that we can avoid creating
+        events which refer to hundreds of prev_events.
+
+        Args:
+            room_id (str): room_id
+
+        Returns:
+            Deferred[list[(str, dict[str, str], int)]]
+                for each event, a tuple of (event_id, hashes, depth)
+                where *hashes* is a map from algorithm to hash.
+        """
+        res = yield self.get_latest_event_ids_and_hashes_in_room(room_id)
+        if len(res) > 10:
+            # Sort by reverse depth, so we point to the most recent.
+            res.sort(key=lambda a: -a[2])
+
+            # we use half of the limit for the actual most recent events, and
+            # the other half to randomly point to some of the older events, to
+            # make sure that we don't completely ignore the older events.
+            res = res[0:5] + random.sample(res[5:], 5)
+
+        defer.returnValue(res)
+
     def get_latest_event_ids_and_hashes_in_room(self, room_id):
+        """
+        Gets the current forward extremities in the given room
+
+        Args:
+            room_id (str): room_id
+
+        Returns:
+            Deferred[list[(str, dict[str, str], int)]]
+                for each event, a tuple of (event_id, hashes, depth)
+                where *hashes* is a map from algorithm to hash.
+        """
+
         return self.runInteraction(
             "get_latest_event_ids_and_hashes_in_room",
             self._get_latest_event_ids_and_hashes_in_room,
@@ -182,22 +223,6 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
             room_id,
         )
 
-    @defer.inlineCallbacks
-    def get_max_depth_of_events(self, event_ids):
-        sql = (
-            "SELECT MAX(depth) FROM events WHERE event_id IN (%s)"
-        ) % (",".join(["?"] * len(event_ids)),)
-
-        rows = yield self._execute(
-            "get_max_depth_of_events", None,
-            sql, *event_ids
-        )
-
-        if rows:
-            defer.returnValue(rows[0][0])
-        else:
-            defer.returnValue(1)
-
     def _get_min_depth_interaction(self, txn, room_id):
         min_depth = self._simple_select_one_onecol_txn(
             txn,
author	Richard van der Hoff <richard@matrix.org>	2018-04-16 18:41:37 +0100
committer	Richard van der Hoff <richard@matrix.org>	2018-04-16 18:41:37 +0100
commit	639480e14a06723adf6817ddd2b2ff9e4f4cdf2a (patch)
tree	5ab5ab8b86450776a7ee918bfd1cf73a9e48da6d /synapse/storage
parent	fix spurious changelog dup (diff)
download	synapse-639480e14a06723adf6817ddd2b2ff9e4f4cdf2a.tar.xz