summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorErik Johnston <erikj@jki.re>2016-08-03 14:10:36 +0100
committerGitHub <noreply@github.com>2016-08-03 14:10:36 +0100
commitf5da3bacb22297f48779d5a4621d45abec40e258 (patch)
tree7e4b47036793df5c75788d7e3a72cf40a17c377f /synapse/storage
parentPrint status code in federation_client.py (diff)
parentRemove other bit of deduplication (diff)
downloadsynapse-f5da3bacb22297f48779d5a4621d45abec40e258.tar.xz
Merge pull request #975 from matrix-org/erikj/multi_event_persist
Ensure we only persist an event once at a time
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/events.py38
1 files changed, 18 insertions, 20 deletions
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index c63ca36df6..4664cfe6d9 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -26,7 +26,7 @@ from synapse.api.constants import EventTypes
 from synapse.api.errors import SynapseError
 
 from canonicaljson import encode_canonical_json
-from collections import deque, namedtuple
+from collections import deque, namedtuple, OrderedDict
 
 import synapse
 import synapse.metrics
@@ -403,6 +403,23 @@ class EventsStore(SQLBaseStore):
         and the rejections table. Things reading from those table will need to check
         whether the event was rejected.
         """
+        # Ensure that we don't have the same event twice.
+        # Pick the earliest non-outlier if there is one, else the earliest one.
+        new_events_and_contexts = OrderedDict()
+        for event, context in events_and_contexts:
+            prev_event_context = new_events_and_contexts.get(event.event_id)
+            if prev_event_context:
+                if not event.internal_metadata.is_outlier():
+                    if prev_event_context[0].internal_metadata.is_outlier():
+                        # To ensure correct ordering we pop, as OrderedDict is
+                        # ordered by first insertion.
+                        new_events_and_contexts.pop(event.event_id, None)
+                        new_events_and_contexts[event.event_id] = (event, context)
+            else:
+                new_events_and_contexts[event.event_id] = (event, context)
+
+        events_and_contexts = new_events_and_contexts.values()
+
         depth_updates = {}
         for event, context in events_and_contexts:
             # Remove the any existing cache entries for the event_ids
@@ -433,8 +450,6 @@ class EventsStore(SQLBaseStore):
             for event_id, outlier in txn.fetchall()
         }
 
-        # Remove the events that we've seen before.
-        event_map = {}
         to_remove = set()
         for event, context in events_and_contexts:
             if context.rejected:
@@ -445,23 +460,6 @@ class EventsStore(SQLBaseStore):
                     to_remove.add(event)
                 continue
 
-            # Handle the case of the list including the same event multiple
-            # times. The tricky thing here is when they differ by whether
-            # they are an outlier.
-            if event.event_id in event_map:
-                other = event_map[event.event_id]
-
-                if not other.internal_metadata.is_outlier():
-                    to_remove.add(event)
-                    continue
-                elif not event.internal_metadata.is_outlier():
-                    to_remove.add(event)
-                    continue
-                else:
-                    to_remove.add(other)
-
-            event_map[event.event_id] = event
-
             if event.event_id not in have_persisted:
                 continue