Change the way pagination works to support out of order events.

author: Erik Johnston <erik@matrix.org> 2014-08-19 14:19:48 +0100
committer: Erik Johnston <erik@matrix.org> 2014-08-19 14:19:48 +0100
commit: 598a1d8ff953c70f9f54564225d693a1bcf42144 (patch)
tree: dc5304ebe26c47bcd74ce3df66abb2d3a366bcc7
parent: Fix typos in SQL and where we still had rowid's (which no longer exist) (diff)
download: synapse-598a1d8ff953c70f9f54564225d693a1bcf42144.tar.xz
8 files changed, 227 insertions, 82 deletions
diff --git a/synapse/api/streams/__init__.py b/synapse/api/streams/__init__.py
index 989e63f9ec..44f4cc6078 100644
--- a/synapse/api/streams/__init__.py
+++ b/synapse/api/streams/__init__.py
@@ -20,23 +20,23 @@ class PaginationConfig(object):
 
     """A configuration object which stores pagination parameters."""
 
-    def __init__(self, from_tok=None, to_tok=None, limit=0):
+    def __init__(self, from_tok=None, to_tok=None, direction='f', limit=0):
         self.from_tok = from_tok
         self.to_tok = to_tok
+        self.direction = direction
         self.limit = limit
 
     @classmethod
     def from_request(cls, request, raise_invalid_params=True):
         params = {
-            "from_tok": PaginationStream.TOK_START,
-            "to_tok": PaginationStream.TOK_END,
-            "limit": 0
+            "direction": 'f',
         }
 
         query_param_mappings = [  # 3-tuple of qp_key, attribute, rules
             ("from", "from_tok", lambda x: type(x) == str),
             ("to", "to_tok", lambda x: type(x) == str),
-            ("limit", "limit", lambda x: x.isdigit())
+            ("limit", "limit", lambda x: x.isdigit()),
+            ("dir", "direction", lambda x: x == 'f' or x == 'b'),
         ]
 
         for qp, attr, is_valid in query_param_mappings:
@@ -48,12 +48,17 @@ class PaginationConfig(object):
 
         return PaginationConfig(**params)
 
+    def __str__(self):
+        return (
+            "<PaginationConfig from_tok=%s, to_tok=%s, "
+            "direction=%s, limit=%s>"
+        ) % (self.from_tok, self.to_tok, self.direction, self.limit)
+
 
 class PaginationStream(object):
 
     """ An interface for streaming data as chunks. """
 
-    TOK_START = "START"
     TOK_END = "END"
 
     def get_chunk(self, config=None):
@@ -76,7 +81,7 @@ class StreamData(object):
         self.hs = hs
         self.store = hs.get_datastore()
 
-    def get_rows(self, user_id, from_pkey, to_pkey, limit):
+    def get_rows(self, user_id, from_pkey, to_pkey, limit, direction):
         """ Get event stream data between the specified pkeys.
 
         Args:
diff --git a/synapse/api/streams/event.py b/synapse/api/streams/event.py
index 414b05be30..a5c8b2b31f 100644
--- a/synapse/api/streams/event.py
+++ b/synapse/api/streams/event.py
@@ -38,8 +38,8 @@ class EventsStreamData(StreamData):
         self.with_feedback = feedback
 
     @defer.inlineCallbacks
-    def get_rows(self, user_id, from_key, to_key, limit):
-        data, latest_ver = yield self.store.get_room_events_stream(
+    def get_rows(self, user_id, from_key, to_key, limit, direction):
+        data, latest_ver = yield self.store.get_room_events(
             user_id=user_id,
             from_key=from_key,
             to_key=to_key,
@@ -70,6 +70,15 @@ class EventStream(PaginationStream):
             pagination_config.from_tok)
         pagination_config.to_tok = yield self.fix_token(
             pagination_config.to_tok)
+
+        if (
+            not pagination_config.to_tok
+            and pagination_config.direction == 'f'
+        ):
+            pagination_config.to_tok = yield self.get_current_max_token()
+
+        logger.debug("pagination_config: %s", pagination_config)
+
         defer.returnValue(pagination_config)
 
     @defer.inlineCallbacks
@@ -81,39 +90,42 @@ class EventStream(PaginationStream):
         Returns:
             The fixed-up token, which may == token.
         """
-        # replace TOK_START and TOK_END with 0_0_0 or -1_-1_-1 depending.
-        replacements = [
-            (PaginationStream.TOK_START, "0"),
-            (PaginationStream.TOK_END, "-1")
-        ]
-        for magic_token, key in replacements:
-            if magic_token == token:
-                token = EventStream.SEPARATOR.join(
-                    [key] * len(self.stream_data)
-                )
-
-        # replace -1 values with an actual pkey
-        token_segments = self._split_token(token)
-        for i, tok in enumerate(token_segments):
-            if tok == -1:
-                # add 1 to the max token because results are EXCLUSIVE from the
-                # latest version.
-                token_segments[i] = 1 + (yield self.stream_data[i].max_token())
-        defer.returnValue(EventStream.SEPARATOR.join(
-            str(x) for x in token_segments
-        ))
+        if token == PaginationStream.TOK_END:
+            new_token = yield self.get_current_max_token()
+
+            logger.debug("fix_token: From %s to %s", token, new_token)
+
+            token = new_token
+
+        defer.returnValue(token)
 
     @defer.inlineCallbacks
-    def get_chunk(self, config=None):
+    def get_current_max_token(self):
+        new_token_parts = []
+        for s in self.stream_data:
+            mx = yield s.max_token()
+            new_token_parts.append(str(mx))
+
+        new_token = EventStream.SEPARATOR.join(new_token_parts)
+
+        logger.debug("get_current_max_token: %s", new_token)
+
+        defer.returnValue(new_token)
+
+    @defer.inlineCallbacks
+    def get_chunk(self, config):
         # no support for limit on >1 streams, makes no sense.
         if config.limit and len(self.stream_data) > 1:
             raise EventStreamError(
                 400, "Limit not supported on multiplexed streams."
             )
 
-        (chunk_data, next_tok) = yield self._get_chunk_data(config.from_tok,
-                                                            config.to_tok,
-                                                            config.limit)
+        chunk_data, next_tok = yield self._get_chunk_data(
+            config.from_tok,
+            config.to_tok,
+            config.limit,
+            config.direction,
+        )
 
         defer.returnValue({
             "chunk": chunk_data,
@@ -122,7 +134,7 @@ class EventStream(PaginationStream):
         })
 
     @defer.inlineCallbacks
-    def _get_chunk_data(self, from_tok, to_tok, limit):
+    def _get_chunk_data(self, from_tok, to_tok, limit, direction):
         """ Get event data between the two tokens.
 
         Tokens are SEPARATOR separated values representing pkey values of
@@ -140,11 +152,12 @@ class EventStream(PaginationStream):
             EventStreamError if something went wrong.
         """
         # sanity check
-        if (from_tok.count(EventStream.SEPARATOR) !=
-                to_tok.count(EventStream.SEPARATOR) or
-                (from_tok.count(EventStream.SEPARATOR) + 1) !=
-                len(self.stream_data)):
-            raise EventStreamError(400, "Token lengths don't match.")
+        if to_tok is not None:
+            if (from_tok.count(EventStream.SEPARATOR) !=
+                    to_tok.count(EventStream.SEPARATOR) or
+                    (from_tok.count(EventStream.SEPARATOR) + 1) !=
+                    len(self.stream_data)):
+                raise EventStreamError(400, "Token lengths don't match.")
 
         chunk = []
         next_ver = []
@@ -158,7 +171,7 @@ class EventStream(PaginationStream):
                 continue
 
             (event_chunk, max_pkey) = yield self.stream_data[i].get_rows(
-                self.user_id, from_pkey, to_pkey, limit
+                self.user_id, from_pkey, to_pkey, limit, direction,
             )
 
             chunk.extend([
@@ -177,9 +190,8 @@ class EventStream(PaginationStream):
         Returns:
             A list of ints.
         """
-        segments = token.split(EventStream.SEPARATOR)
-        try:
-            int_segments = [int(x) for x in segments]
-        except ValueError:
-            raise EventStreamError(400, "Bad token: %s" % token)
-        return int_segments
+        if token:
+            segments = token.split(EventStream.SEPARATOR)
+        else:
+            segments = [None] * len(self.stream_data)
+        return segments
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index e8cb83eddb..f140dc527a 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -684,7 +684,7 @@ class PresenceStreamData(StreamData):
         super(PresenceStreamData, self).__init__(hs)
         self.presence = hs.get_handlers().presence_handler
 
-    def get_rows(self, user_id, from_key, to_key, limit):
+    def get_rows(self, user_id, from_key, to_key, limit, direction):
         cachemap = self.presence._user_cachemap
 
         # TODO(paul): limit, and filter by visibility
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index e5dd2e245f..40867ae2e0 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -466,7 +466,7 @@ class RoomMemberHandler(BaseHandler):
             for entry in member_list
         ]
         chunk_data = {
-            "start": "START",
+            "start": "START",  # FIXME (erikj): START is no longer a valid value
             "end": "END",
             "chunk": event_list
         }
@@ -811,4 +811,5 @@ class RoomListHandler(BaseHandler):
     @defer.inlineCallbacks
     def get_public_room_list(self):
         chunk = yield self.store.get_rooms(is_public=True)
+        # FIXME (erikj): START is no longer a valid value
         defer.returnValue({"start": "START", "end": "END", "chunk": chunk})
diff --git a/synapse/storage/schema/im.sql b/synapse/storage/schema/im.sql
index 0fb3dbee55..ea04261ff0 100644
--- a/synapse/storage/schema/im.sql
+++ b/synapse/storage/schema/im.sql
@@ -14,7 +14,7 @@
  */
 
 CREATE TABLE IF NOT EXISTS events(
-    token_ordering INTEGER PRIMARY KEY AUTOINCREMENT,
+    stream_ordering INTEGER PRIMARY KEY AUTOINCREMENT,
     topological_ordering INTEGER NOT NULL,
     event_id TEXT NOT NULL,
     type TEXT NOT NULL,
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index c03c983e14..87fc978813 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -16,8 +16,9 @@
 from twisted.internet import defer
 
 from ._base import SQLBaseStore
-
+from synapse.api.errors import SynapseError
 from synapse.api.constants import Membership
+from synapse.util.logutils import log_function
 
 import json
 import logging
@@ -29,9 +30,96 @@ logger = logging.getLogger(__name__)
 MAX_STREAM_SIZE = 1000
 
 
+_STREAM_TOKEN = "stream"
+_TOPOLOGICAL_TOKEN = "topological"
+
+
+def _parse_stream_token(string):
+    try:
+        if string[0] != 's':
+            raise
+        return int(string[1:])
+    except:
+        logger.debug("Not stream token: %s", string)
+        raise SynapseError(400, "Invalid token")
+
+
+def _parse_topological_token(string):
+    try:
+        if string[0] != 't':
+            raise
+        parts = string[1:].split('-', 1)
+        return (int(parts[0]), int(parts[1]))
+    except:
+        logger.debug("Not topological token: %s", string)
+        raise SynapseError(400, "Invalid token")
+
+
+def is_stream_token(string):
+    try:
+        _parse_stream_token(string)
+        return True
+    except:
+        return False
+
+
+def is_topological_token(string):
+    try:
+        _parse_topological_token(string)
+        return True
+    except:
+        return False
+
+
+def _get_token_bound(token, comparison):
+    try:
+        s = _parse_stream_token(token)
+        return "%s %s %d" % ("stream_ordering", comparison, s)
+    except:
+        pass
+
+    try:
+        top, stream = _parse_topological_token(token)
+        return "%s %s %d AND %s %s %d" % (
+            "topological_ordering", comparison, top,
+            "stream_ordering", comparison, stream,
+        )
+    except:
+        pass
+
+    raise SynapseError(400, "Invalid token")
+
+
 class StreamStore(SQLBaseStore):
+    @log_function
+    def get_room_events(self, user_id, from_key, to_key, room_id, limit=0,
+                        direction='f', with_feedback=False):
+        is_events = (
+            direction == 'f'
+            and is_stream_token(from_key)
+            and to_key and is_stream_token(to_key)
+        )
+
+        if is_events:
+            return self.get_room_events_stream(
+                user_id=user_id,
+                from_key=from_key,
+                to_key=to_key,
+                room_id=room_id,
+                limit=limit,
+                with_feedback=with_feedback,
+            )
+        else:
+            return self.paginate_room_events(
+                from_key=from_key,
+                to_key=to_key,
+                room_id=room_id,
+                limit=limit,
+                with_feedback=with_feedback,
+            )
 
     @defer.inlineCallbacks
+    @log_function
     def get_room_events_stream(self, user_id, from_key, to_key, room_id,
                                limit=0, with_feedback=False):
         # TODO (erikj): Handle compressed feedback
@@ -54,8 +142,8 @@ class StreamStore(SQLBaseStore):
             limit = MAX_STREAM_SIZE
 
         # From and to keys should be integers from ordering.
-        from_key = int(from_key)
-        to_key = int(to_key)
+        from_id = _parse_stream_token(from_key)
+        to_id = _parse_stream_token(to_key)
 
         if from_key == to_key:
             defer.returnValue(([], to_key))
@@ -65,42 +153,79 @@ class StreamStore(SQLBaseStore):
             "SELECT * FROM events as e WHERE "
             "((room_id IN (%(current)s)) OR "
             "(event_id IN (%(invites)s))) "
+            "AND e.stream_ordering > ? AND e.stream_ordering < ? "
+            "ORDER BY stream_ordering ASC LIMIT %(limit)d "
         ) % {
             "current": current_room_membership_sql,
             "invites": invites_sql,
+            "limit": limit
         }
 
-        # Constraints and ordering depend on direction.
-        if from_key < to_key:
-            sql += (
-                "AND e.token_ordering > ? AND e.token_ordering < ? "
-                "ORDER BY token_ordering ASC LIMIT %(limit)d "
-            ) % {"limit": limit}
-        else:
-            sql += (
-                "AND e.token_ordering < ? "
-                "AND e.token_ordering > ? "
-                "ORDER BY e.token_ordering DESC LIMIT %(limit)d "
-            ) % {"limit": int(limit)}
-
         rows = yield self._execute_and_decode(
             sql,
-            user_id, user_id, Membership.INVITE, from_key, to_key
+            user_id, user_id, Membership.INVITE, from_id, to_id
         )
 
         ret = [self._parse_event_from_row(r) for r in rows]
 
         if rows:
-            if from_key < to_key:
-                key = max([r["token_ordering"] for r in rows])
-            else:
-                key = min([r["token_ordering"] for r in rows])
+            key = "s%d" % max([r["stream_ordering"] for r in rows])
         else:
+            # Assume we didn't get anything because there was nothing to get.
             key = to_key
 
         defer.returnValue((ret, key))
 
     @defer.inlineCallbacks
+    @log_function
+    def paginate_room_events(self, room_id, from_key, to_key=None,
+                             direction='b', limit=-1,
+                             with_feedback=False):
+        # TODO (erikj): Handle compressed feedback
+
+        from_comp = '<' if direction =='b' else '>'
+        to_comp = '>' if direction =='b' else '<'
+        order = "DESC" if direction == 'b' else "ASC"
+
+        args = [room_id]
+
+        bounds = _get_token_bound(from_key, from_comp)
+        if to_key:
+            bounds = "%s AND %s" % (bounds, _get_token_bound(to_key, to_comp))
+
+        if int(limit) > 0:
+            args.append(int(limit))
+            limit_str = " LIMIT ?"
+        else:
+            limit_str = ""
+
+        sql = (
+            "SELECT * FROM events "
+            "WHERE room_id = ? AND %(bounds)s "
+            "ORDER BY topological_ordering %(order)s, stream_ordering %(order)s %(limit)s "
+        ) % {"bounds": bounds, "order": order, "limit": limit_str}
+
+        rows = yield self._execute_and_decode(
+            sql,
+            *args
+        )
+
+        if rows:
+            topo = rows[-1]["topological_ordering"]
+            toke = rows[-1]["stream_ordering"]
+            next_token = "t%s-%s" % (topo, toke)
+        else:
+            # TODO (erikj): We should work out what to do here instead.
+            next_token = to_key if to_key else from_key
+
+        defer.returnValue(
+            (
+                [self._parse_event_from_row(r) for r in rows],
+                next_token
+            )
+        )
+
+    @defer.inlineCallbacks
     def get_recent_events_for_room(self, room_id, limit, with_feedback=False):
         # TODO (erikj): Handle compressed feedback
 
@@ -108,8 +233,8 @@ class StreamStore(SQLBaseStore):
 
         sql = (
             "SELECT * FROM events "
-            "WHERE room_id = ? AND token_ordering <= ? "
-            "ORDER BY topological_ordering, token_ordering DESC LIMIT ? "
+            "WHERE room_id = ? AND stream_ordering <= ? "
+            "ORDER BY topological_ordering, stream_ordering DESC LIMIT ? "
         )
 
         rows = yield self._execute_and_decode(
@@ -121,12 +246,12 @@ class StreamStore(SQLBaseStore):
 
         if rows:
             topo = rows[0]["topological_ordering"]
-            toke = rows[0]["token_ordering"]
+            toke = rows[0]["stream_ordering"]
             start_token = "p%s-%s" % (topo, toke)
 
             token = (start_token, end_token)
         else:
-            token = ("START", end_token)
+            token = (end_token, end_token)
 
         defer.returnValue(
             (
@@ -138,11 +263,14 @@ class StreamStore(SQLBaseStore):
     @defer.inlineCallbacks
     def get_room_events_max_id(self):
         res = yield self._execute_and_decode(
-            "SELECT MAX(token_ordering) as m FROM events"
+            "SELECT MAX(stream_ordering) as m FROM events"
         )
 
-        if not res:
-            defer.returnValue(0)
+        logger.debug("get_room_events_max_id: %s", res)
+
+        if not res or not res[0] or not res[0]["m"]:
+            defer.returnValue("s1")
             return
 
-        defer.returnValue(res[0]["m"])
+        key = res[0]["m"] + 1
+        defer.returnValue("s%d" % (key,))
diff --git a/webclient/components/matrix/event-stream-service.js b/webclient/components/matrix/event-stream-service.js
index c94cf0fe72..a446fad5d4 100644
--- a/webclient/components/matrix/event-stream-service.js
+++ b/webclient/components/matrix/event-stream-service.js
@@ -25,7 +25,6 @@ the eventHandlerService.
 angular.module('eventStreamService', [])
 .factory('eventStreamService', ['$q', '$timeout', 'matrixService', 'eventHandlerService', function($q, $timeout, matrixService, eventHandlerService) {
     var END = "END";
-    var START = "START";
     var TIMEOUT_MS = 30000;
     var ERR_TIMEOUT_MS = 5000;
     
diff --git a/webclient/components/matrix/matrix-service.js b/webclient/components/matrix/matrix-service.js
index c52c94c310..3cd0aa674b 100644
--- a/webclient/components/matrix/matrix-service.js
+++ b/webclient/components/matrix/matrix-service.js
@@ -230,8 +230,8 @@ angular.module('matrixService', [])
             path = path.replace("$room_id", room_id);
             var params = {
                 from: from_token,
-                to: "START",
-                limit: limit
+                limit: limit,
+                dir: 'b'
             };
             return doRequest("GET", path, params);
         },
author	Erik Johnston <erik@matrix.org>	2014-08-19 14:19:48 +0100
committer	Erik Johnston <erik@matrix.org>	2014-08-19 14:19:48 +0100
commit	598a1d8ff953c70f9f54564225d693a1bcf42144 (patch)
tree	dc5304ebe26c47bcd74ce3df66abb2d3a366bcc7
parent	Fix typos in SQL and where we still had rowid's (which no longer exist) (diff)
download	synapse-598a1d8ff953c70f9f54564225d693a1bcf42144.tar.xz