summary refs log tree commit diff
diff options
context:
space:
mode:
authorMark Haines <mark.haines@matrix.org>2015-01-13 14:14:21 +0000
committerMark Haines <mark.haines@matrix.org>2015-01-13 14:38:53 +0000
commit895fcb377e3ebc43d67df1ac66413d92aacffca1 (patch)
treed15af4605c65b7f9718cc7cbe85829c92bc048ac
parentMerge branch 'hotfixes-v0.6.1b' of github.com:matrix-org/synapse into develop (diff)
downloadsynapse-895fcb377e3ebc43d67df1ac66413d92aacffca1.tar.xz
Fix stream token ordering
-rw-r--r--synapse/storage/stream.py173
1 files changed, 101 insertions, 72 deletions
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index 744c821dfe..563c8e3bbb 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -39,6 +39,8 @@ from ._base import SQLBaseStore
 from synapse.api.errors import SynapseError
 from synapse.util.logutils import log_function
 
+from collections import namedtuple
+
 import logging
 
 
@@ -52,58 +54,76 @@ _STREAM_TOKEN = "stream"
 _TOPOLOGICAL_TOKEN = "topological"
 
 
-def _parse_stream_token(string):
-    try:
-        if string[0] != 's':
-            raise
-        return int(string[1:])
-    except:
-        raise SynapseError(400, "Invalid token")
-
-
-def _parse_topological_token(string):
-    try:
-        if string[0] != 't':
-            raise
-        parts = string[1:].split('-', 1)
-        return (int(parts[0]), int(parts[1]))
-    except:
-        raise SynapseError(400, "Invalid token")
-
-
-def is_stream_token(string):
-    try:
-        _parse_stream_token(string)
-        return True
-    except:
-        return False
-
-
-def is_topological_token(string):
-    try:
-        _parse_topological_token(string)
-        return True
-    except:
-        return False
-
-
-def _get_token_bound(token, comparison):
-    try:
-        s = _parse_stream_token(token)
-        return "%s %s %d" % ("stream_ordering", comparison, s)
-    except:
-        pass
-
-    try:
-        top, stream = _parse_topological_token(token)
-        return "%s %s %d AND %s %s %d" % (
-            "topological_ordering", comparison, top,
-            "stream_ordering", comparison, stream,
-        )
-    except:
-        pass
+class _StreamToken(namedtuple("_StreamToken", "topological stream")):
+    """Tokens are positions between events. The token "s1" comes after event 1.
+
+            s0    s1
+            |     |
+        [0] V [1] V [2]
+
+    Tokens can either be a point in the live event stream or a cursor going
+    through historic events.
+
+    When traversing the live event stream events are ordered by when they
+    arrived at the homeserver.
+
+    When traversing historic events the events are ordered by their depth in
+    the event graph "topological_ordering" and then by when they arrived at the
+    homeserver "stream_ordering".
+
+    Live tokens start with an "s" followed by the "stream_ordering" id of the
+    event it comes after. Historic tokens start with a "t" followed by the
+    "topological_ordering" id of the event it comes after, follewed by "-",
+    followed by the "stream_ordering" id of the event it comes after.
+    """
+    __slots__ = []
+
+    @classmethod
+    def parse(cls, string):
+        try:
+            if string[0] == 's':
+                return cls(None, int(string[1:]))
+            if string[0] == 't':
+                parts = string[1:].split('-', 1)
+                return cls(int(parts[1]), int(parts[0]))
+        except:
+            pass
+        raise SynapseError(400, "Invalid token %r" % (string,))
+
+    @classmethod
+    def parse_stream_token(cls, string):
+        try:
+            if string[0] == 's':
+                return cls(None, int(string[1:]))
+        except:
+            pass
+        raise SynapseError(400, "Invalid token %r" % (string,))
+
+    def __str__(self):
+        if self.topological is not None:
+            return "t%d-%d" % (self.topological, self.stream)
+        else:
+            return "s%d" % (self.stream,)
+
+    def lower_bound(self):
+        if self.topological is None:
+            return "(%d < %s)" % (self.stream, "stream_ordering")
+        else:
+            return "(%d < %s OR (%d == %s AND %d < %s))" % (
+                self.topological, "topological_ordering",
+                self.topological, "topological_ordering",
+                self.stream, "stream_ordering",
+            )
 
-    raise SynapseError(400, "Invalid token")
+    def upper_bound(self):
+        if self.topological is None:
+            return "(%d >= %s)" % (self.stream, "stream_ordering")
+        else:
+            return "(%d > %s OR (%d == %s AND %d >= %s))" % (
+                self.topological, "topological_ordering",
+                self.topological, "topological_ordering",
+                self.stream, "stream_ordering",
+            )
 
 
 class StreamStore(SQLBaseStore):
@@ -162,8 +182,8 @@ class StreamStore(SQLBaseStore):
             limit = MAX_STREAM_SIZE
 
         # From and to keys should be integers from ordering.
-        from_id = _parse_stream_token(from_key)
-        to_id = _parse_stream_token(to_key)
+        from_id = _StreamToken.parse_stream_token(from_key)
+        to_id = _StreamToken.parse_stream_token(to_key)
 
         if from_key == to_key:
             return defer.succeed(([], to_key))
@@ -181,7 +201,7 @@ class StreamStore(SQLBaseStore):
         }
 
         def f(txn):
-            txn.execute(sql, (user_id, user_id, from_id, to_id,))
+            txn.execute(sql, (user_id, user_id, from_id.stream, to_id.stream,))
 
             rows = self.cursor_to_dict(txn)
 
@@ -211,17 +231,21 @@ class StreamStore(SQLBaseStore):
         # Tokens really represent positions between elements, but we use
         # the convention of pointing to the event before the gap. Hence
         # we have a bit of asymmetry when it comes to equalities.
-        from_comp = '<=' if direction == 'b' else '>'
-        to_comp = '>' if direction == 'b' else '<='
-        order = "DESC" if direction == 'b' else "ASC"
-
         args = [room_id]
-
-        bounds = _get_token_bound(from_key, from_comp)
-        if to_key:
-            bounds = "%s AND %s" % (
-                bounds, _get_token_bound(to_key, to_comp)
-            )
+        if direction == 'b':
+            order = "DESC"
+            bounds = _StreamToken.parse(from_key).upper_bound()
+            if to_key:
+                bounds = "%s AND %s" % (
+                    bounds, _StreamToken.parse(to_key).lower_bound()
+                )
+        else:
+            order = "ASC"
+            bounds = _StreamToken.parse(from_key).lower_bound()
+            if to_key:
+                bounds = "%s AND %s" % (
+                    bounds, _StreamToken.parse(to_key).upper_bound()
+                )
 
         if int(limit) > 0:
             args.append(int(limit))
@@ -249,9 +273,13 @@ class StreamStore(SQLBaseStore):
                 topo = rows[-1]["topological_ordering"]
                 toke = rows[-1]["stream_ordering"]
                 if direction == 'b':
-                    topo -= 1
+                    # Tokens are positions between events.
+                    # This token points *after* the last event in the chunk.
+                    # We need it to point to the event before it in the chunk
+                    # when we are going backwards so we subtract one from the
+                    # stream part.
                     toke -= 1
-                next_token = "t%s-%s" % (topo, toke)
+                next_token = str(_StreamToken(topo, toke))
             else:
                 # TODO (erikj): We should work out what to do here instead.
                 next_token = to_key if to_key else from_key
@@ -284,13 +312,14 @@ class StreamStore(SQLBaseStore):
             rows.reverse()  # As we selected with reverse ordering
 
             if rows:
-                # XXX: Always subtract 1 since the start token always goes
-                # backwards (parity with paginate_room_events). It isn't
-                # obvious that this is correct; we should clarify the algorithm
-                # used here.
-                topo = rows[0]["topological_ordering"] - 1
+                # Tokens are positions between events.
+                # This token points *after* the last event in the chunk.
+                # We need it to point to the event before it in the chunk
+                # since we are going backwards so we subtract one from the
+                # stream part.
+                topo = rows[0]["topological_ordering"]
                 toke = rows[0]["stream_ordering"] - 1
-                start_token = "t%s-%s" % (topo, toke)
+                start_token = str(_StreamToken(topo, toke))
 
                 token = (start_token, end_token)
             else: