diff options
Diffstat (limited to 'synapse/storage/stream.py')
-rw-r--r-- | synapse/storage/stream.py | 469 |
1 files changed, 226 insertions, 243 deletions
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 2956c3b3e0..fb463c525a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -38,15 +38,17 @@ from twisted.internet import defer from synapse.storage._base import SQLBaseStore from synapse.storage.events import EventsWorkerStore -from synapse.util.caches.descriptors import cached from synapse.types import RoomStreamToken from synapse.util.caches.stream_change_cache import StreamChangeCache -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn -from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.util.logcontext import make_deferred_yieldable, run_in_background +from synapse.storage.engines import PostgresEngine import abc import logging +from six.moves import range +from collections import namedtuple + logger = logging.getLogger(__name__) @@ -58,6 +60,12 @@ _STREAM_TOKEN = "stream" _TOPOLOGICAL_TOKEN = "topological" +# Used as return values for pagination APIs +_EventDictReturn = namedtuple("_EventDictReturn", ( + "event_id", "topological_ordering", "stream_ordering", +)) + + def lower_bound(token, engine, inclusive=False): inclusive = "=" if inclusive else "" if token.topological is None: @@ -196,13 +204,14 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): results = {} room_ids = list(room_ids) - for rm_ids in (room_ids[i:i + 20] for i in xrange(0, len(room_ids), 20)): + for rm_ids in (room_ids[i:i + 20] for i in range(0, len(room_ids), 20)): res = yield make_deferred_yieldable(defer.gatherResults([ - preserve_fn(self.get_room_events_stream_for_room)( + run_in_background( + self.get_room_events_stream_for_room, room_id, from_key, to_key, limit, order=order, ) for room_id in rm_ids - ])) + ], consumeErrors=True)) results.update(dict(zip(rm_ids, res))) defer.returnValue(results) @@ -224,54 +233,55 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): @defer.inlineCallbacks def get_room_events_stream_for_room(self, room_id, from_key, to_key, limit=0, order='DESC'): - # Note: If from_key is None then we return in topological order. This - # is because in that case we're using this as a "get the last few messages - # in a room" function, rather than "get new messages since last sync" - if from_key is not None: - from_id = RoomStreamToken.parse_stream_token(from_key).stream - else: - from_id = None - to_id = RoomStreamToken.parse_stream_token(to_key).stream + """Get new room events in stream ordering since `from_key`. + + Args: + room_id (str) + from_key (str): Token from which no events are returned before + to_key (str): Token from which no events are returned after. (This + is typically the current stream token) + limit (int): Maximum number of events to return + order (str): Either "DESC" or "ASC". Determines which events are + returned when the result is limited. If "DESC" then the most + recent `limit` events are returned, otherwise returns the + oldest `limit` events. + + Returns: + Deferred[tuple[list[FrozenEvent], str]]: Returns the list of + events (in ascending order) and the token from the start of + the chunk of events returned. + """ if from_key == to_key: defer.returnValue(([], from_key)) - if from_id: - has_changed = yield self._events_stream_cache.has_entity_changed( - room_id, from_id - ) - - if not has_changed: - defer.returnValue(([], from_key)) + from_id = RoomStreamToken.parse_stream_token(from_key).stream + to_id = RoomStreamToken.parse_stream_token(to_key).stream - def f(txn): - if from_id is not None: - sql = ( - "SELECT event_id, stream_ordering FROM events WHERE" - " room_id = ?" - " AND not outlier" - " AND stream_ordering > ? AND stream_ordering <= ?" - " ORDER BY stream_ordering %s LIMIT ?" - ) % (order,) - txn.execute(sql, (room_id, from_id, to_id, limit)) - else: - sql = ( - "SELECT event_id, stream_ordering FROM events WHERE" - " room_id = ?" - " AND not outlier" - " AND stream_ordering <= ?" - " ORDER BY topological_ordering %s, stream_ordering %s LIMIT ?" - ) % (order, order,) - txn.execute(sql, (room_id, to_id, limit)) + has_changed = yield self._events_stream_cache.has_entity_changed( + room_id, from_id + ) - rows = self.cursor_to_dict(txn) + if not has_changed: + defer.returnValue(([], from_key)) + def f(txn): + sql = ( + "SELECT event_id, stream_ordering FROM events WHERE" + " room_id = ?" + " AND not outlier" + " AND stream_ordering > ? AND stream_ordering <= ?" + " ORDER BY stream_ordering %s LIMIT ?" + ) % (order,) + txn.execute(sql, (room_id, from_id, to_id, limit)) + + rows = [_EventDictReturn(row[0], None, row[1]) for row in txn] return rows rows = yield self.runInteraction("get_room_events_stream_for_room", f) ret = yield self._get_events( - [r["event_id"] for r in rows], + [r.event_id for r in rows], get_prev_content=True ) @@ -281,7 +291,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ret.reverse() if rows: - key = "s%d" % min(r["stream_ordering"] for r in rows) + key = "s%d" % min(r.stream_ordering for r in rows) else: # Assume we didn't get anything because there was nothing to # get. @@ -291,10 +301,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): @defer.inlineCallbacks def get_membership_changes_for_user(self, user_id, from_key, to_key): - if from_key is not None: - from_id = RoomStreamToken.parse_stream_token(from_key).stream - else: - from_id = None + from_id = RoomStreamToken.parse_stream_token(from_key).stream to_id = RoomStreamToken.parse_stream_token(to_key).stream if from_key == to_key: @@ -308,34 +315,24 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): defer.returnValue([]) def f(txn): - if from_id is not None: - sql = ( - "SELECT m.event_id, stream_ordering FROM events AS e," - " room_memberships AS m" - " WHERE e.event_id = m.event_id" - " AND m.user_id = ?" - " AND e.stream_ordering > ? AND e.stream_ordering <= ?" - " ORDER BY e.stream_ordering ASC" - ) - txn.execute(sql, (user_id, from_id, to_id,)) - else: - sql = ( - "SELECT m.event_id, stream_ordering FROM events AS e," - " room_memberships AS m" - " WHERE e.event_id = m.event_id" - " AND m.user_id = ?" - " AND stream_ordering <= ?" - " ORDER BY stream_ordering ASC" - ) - txn.execute(sql, (user_id, to_id,)) - rows = self.cursor_to_dict(txn) + sql = ( + "SELECT m.event_id, stream_ordering FROM events AS e," + " room_memberships AS m" + " WHERE e.event_id = m.event_id" + " AND m.user_id = ?" + " AND e.stream_ordering > ? AND e.stream_ordering <= ?" + " ORDER BY e.stream_ordering ASC" + ) + txn.execute(sql, (user_id, from_id, to_id,)) + + rows = [_EventDictReturn(row[0], None, row[1]) for row in txn] return rows rows = yield self.runInteraction("get_membership_changes_for_user", f) ret = yield self._get_events( - [r["event_id"] for r in rows], + [r.event_id for r in rows], get_prev_content=True ) @@ -344,14 +341,28 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): defer.returnValue(ret) @defer.inlineCallbacks - def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): + def get_recent_events_for_room(self, room_id, limit, end_token): + """Get the most recent events in the room in topological ordering. + + Args: + room_id (str) + limit (int) + end_token (str): The stream token representing now. + + Returns: + Deferred[tuple[list[FrozenEvent], str]]: Returns a list of + events and a token pointing to the start of the returned + events. + The events returned are in ascending order. + """ + rows, token = yield self.get_recent_event_ids_for_room( - room_id, limit, end_token, from_token + room_id, limit, end_token, ) logger.debug("stream before") events = yield self._get_events( - [r["event_id"] for r in rows], + [r.event_id for r in rows], get_prev_content=True ) logger.debug("stream after") @@ -360,60 +371,36 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): defer.returnValue((events, token)) - @cached(num_args=4) - def get_recent_event_ids_for_room(self, room_id, limit, end_token, from_token=None): - end_token = RoomStreamToken.parse_stream_token(end_token) - - if from_token is None: - sql = ( - "SELECT stream_ordering, topological_ordering, event_id" - " FROM events" - " WHERE room_id = ? AND stream_ordering <= ? AND outlier = ?" - " ORDER BY topological_ordering DESC, stream_ordering DESC" - " LIMIT ?" - ) - else: - from_token = RoomStreamToken.parse_stream_token(from_token) - sql = ( - "SELECT stream_ordering, topological_ordering, event_id" - " FROM events" - " WHERE room_id = ? AND stream_ordering > ?" - " AND stream_ordering <= ? AND outlier = ?" - " ORDER BY topological_ordering DESC, stream_ordering DESC" - " LIMIT ?" - ) - - def get_recent_events_for_room_txn(txn): - if from_token is None: - txn.execute(sql, (room_id, end_token.stream, False, limit,)) - else: - txn.execute(sql, ( - room_id, from_token.stream, end_token.stream, False, limit - )) + @defer.inlineCallbacks + def get_recent_event_ids_for_room(self, room_id, limit, end_token): + """Get the most recent events in the room in topological ordering. - rows = self.cursor_to_dict(txn) + Args: + room_id (str) + limit (int) + end_token (str): The stream token representing now. - rows.reverse() # As we selected with reverse ordering + Returns: + Deferred[tuple[list[_EventDictReturn], str]]: Returns a list of + _EventDictReturn and a token pointing to the start of the returned + events. + The events returned are in ascending order. + """ + # Allow a zero limit here, and no-op. + if limit == 0: + defer.returnValue(([], end_token)) - if rows: - # Tokens are positions between events. - # This token points *after* the last event in the chunk. - # We need it to point to the event before it in the chunk - # since we are going backwards so we subtract one from the - # stream part. - topo = rows[0]["topological_ordering"] - toke = rows[0]["stream_ordering"] - 1 - start_token = str(RoomStreamToken(topo, toke)) + end_token = RoomStreamToken.parse(end_token) - token = (start_token, str(end_token)) - else: - token = (str(end_token), str(end_token)) + rows, token = yield self.runInteraction( + "get_recent_event_ids_for_room", self._paginate_room_events_txn, + room_id, from_token=end_token, limit=limit, + ) - return rows, token + # We want to return the results in ascending order. + rows.reverse() - return self.runInteraction( - "get_recent_events_for_room", get_recent_events_for_room_txn - ) + defer.returnValue((rows, token)) def get_room_event_after_stream_ordering(self, room_id, stream_ordering): """Gets details of the first event in a room at or after a stream ordering @@ -517,10 +504,20 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): @staticmethod def _set_before_and_after(events, rows, topo_order=True): + """Inserts ordering information to events' internal metadata from + the DB rows. + + Args: + events (list[FrozenEvent]) + rows (list[_EventDictReturn]) + topo_order (bool): Whether the events were ordered topologically + or by stream ordering. If true then all rows should have a non + null topological_ordering. + """ for event, row in zip(events, rows): - stream = row["stream_ordering"] - if topo_order: - topo = event.depth + stream = row.stream_ordering + if topo_order and row.topological_ordering: + topo = row.topological_ordering else: topo = None internal = event.internal_metadata @@ -592,87 +589,27 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): retcols=["stream_ordering", "topological_ordering"], ) - token = RoomStreamToken( - results["topological_ordering"], + # Paginating backwards includes the event at the token, but paginating + # forward doesn't. + before_token = RoomStreamToken( + results["topological_ordering"] - 1, results["stream_ordering"], ) - if isinstance(self.database_engine, Sqlite3Engine): - # SQLite3 doesn't optimise ``(x < a) OR (x = a AND y < b)`` - # So we give pass it to SQLite3 as the UNION ALL of the two queries. - - query_before = ( - "SELECT topological_ordering, stream_ordering, event_id FROM events" - " WHERE room_id = ? AND topological_ordering < ?" - " UNION ALL" - " SELECT topological_ordering, stream_ordering, event_id FROM events" - " WHERE room_id = ? AND topological_ordering = ? AND stream_ordering < ?" - " ORDER BY topological_ordering DESC, stream_ordering DESC LIMIT ?" - ) - before_args = ( - room_id, token.topological, - room_id, token.topological, token.stream, - before_limit, - ) - - query_after = ( - "SELECT topological_ordering, stream_ordering, event_id FROM events" - " WHERE room_id = ? AND topological_ordering > ?" - " UNION ALL" - " SELECT topological_ordering, stream_ordering, event_id FROM events" - " WHERE room_id = ? AND topological_ordering = ? AND stream_ordering > ?" - " ORDER BY topological_ordering ASC, stream_ordering ASC LIMIT ?" - ) - after_args = ( - room_id, token.topological, - room_id, token.topological, token.stream, - after_limit, - ) - else: - query_before = ( - "SELECT topological_ordering, stream_ordering, event_id FROM events" - " WHERE room_id = ? AND %s" - " ORDER BY topological_ordering DESC, stream_ordering DESC LIMIT ?" - ) % (upper_bound(token, self.database_engine, inclusive=False),) - - before_args = (room_id, before_limit) - - query_after = ( - "SELECT topological_ordering, stream_ordering, event_id FROM events" - " WHERE room_id = ? AND %s" - " ORDER BY topological_ordering ASC, stream_ordering ASC LIMIT ?" - ) % (lower_bound(token, self.database_engine, inclusive=False),) - - after_args = (room_id, after_limit) - - txn.execute(query_before, before_args) - - rows = self.cursor_to_dict(txn) - events_before = [r["event_id"] for r in rows] - - if rows: - start_token = str(RoomStreamToken( - rows[0]["topological_ordering"], - rows[0]["stream_ordering"] - 1, - )) - else: - start_token = str(RoomStreamToken( - token.topological, - token.stream - 1, - )) - - txn.execute(query_after, after_args) + after_token = RoomStreamToken( + results["topological_ordering"], + results["stream_ordering"], + ) - rows = self.cursor_to_dict(txn) - events_after = [r["event_id"] for r in rows] + rows, start_token = self._paginate_room_events_txn( + txn, room_id, before_token, direction='b', limit=before_limit, + ) + events_before = [r.event_id for r in rows] - if rows: - end_token = str(RoomStreamToken( - rows[-1]["topological_ordering"], - rows[-1]["stream_ordering"], - )) - else: - end_token = str(token) + rows, end_token = self._paginate_room_events_txn( + txn, room_id, after_token, direction='f', limit=after_limit, + ) + events_after = [r.event_id for r in rows] return { "before": { @@ -735,17 +672,30 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): def has_room_changed_since(self, room_id, stream_id): return self._events_stream_cache.has_entity_changed(room_id, stream_id) + def _paginate_room_events_txn(self, txn, room_id, from_token, to_token=None, + direction='b', limit=-1, event_filter=None): + """Returns list of events before or after a given token. -class StreamStore(StreamWorkerStore): - def get_room_max_stream_ordering(self): - return self._stream_id_gen.get_current_token() + Args: + txn + room_id (str) + from_token (RoomStreamToken): The token used to stream from + to_token (RoomStreamToken|None): A token which if given limits the + results to only those before + direction(char): Either 'b' or 'f' to indicate whether we are + paginating forwards or backwards from `from_key`. + limit (int): The maximum number of events to return. + event_filter (Filter|None): If provided filters the events to + those that match the filter. - def get_room_min_stream_ordering(self): - return self._backfill_id_gen.get_current_token() + Returns: + Deferred[tuple[list[_EventDictReturn], str]]: Returns the results + as a list of _EventDictReturn and a token that points to the end + of the result set. + """ + + assert int(limit) >= 0 - @defer.inlineCallbacks - def paginate_room_events(self, room_id, from_key, to_key=None, - direction='b', limit=-1, event_filter=None): # Tokens really represent positions between elements, but we use # the convention of pointing to the event before the gap. Hence # we have a bit of asymmetry when it comes to equalities. @@ -753,20 +703,20 @@ class StreamStore(StreamWorkerStore): if direction == 'b': order = "DESC" bounds = upper_bound( - RoomStreamToken.parse(from_key), self.database_engine + from_token, self.database_engine ) - if to_key: + if to_token: bounds = "%s AND %s" % (bounds, lower_bound( - RoomStreamToken.parse(to_key), self.database_engine + to_token, self.database_engine )) else: order = "ASC" bounds = lower_bound( - RoomStreamToken.parse(from_key), self.database_engine + from_token, self.database_engine ) - if to_key: + if to_token: bounds = "%s AND %s" % (bounds, upper_bound( - RoomStreamToken.parse(to_key), self.database_engine + to_token, self.database_engine )) filter_clause, filter_args = filter_to_clause(event_filter) @@ -775,52 +725,85 @@ class StreamStore(StreamWorkerStore): bounds += " AND " + filter_clause args.extend(filter_args) - if int(limit) > 0: - args.append(int(limit)) - limit_str = " LIMIT ?" - else: - limit_str = "" + args.append(int(limit)) sql = ( - "SELECT * FROM events" + "SELECT event_id, topological_ordering, stream_ordering" + " FROM events" " WHERE outlier = ? AND room_id = ? AND %(bounds)s" " ORDER BY topological_ordering %(order)s," - " stream_ordering %(order)s %(limit)s" + " stream_ordering %(order)s LIMIT ?" ) % { "bounds": bounds, "order": order, - "limit": limit_str } - def f(txn): - txn.execute(sql, args) - - rows = self.cursor_to_dict(txn) - - if rows: - topo = rows[-1]["topological_ordering"] - toke = rows[-1]["stream_ordering"] - if direction == 'b': - # Tokens are positions between events. - # This token points *after* the last event in the chunk. - # We need it to point to the event before it in the chunk - # when we are going backwards so we subtract one from the - # stream part. - toke -= 1 - next_token = str(RoomStreamToken(topo, toke)) - else: - # TODO (erikj): We should work out what to do here instead. - next_token = to_key if to_key else from_key + txn.execute(sql, args) + + rows = [_EventDictReturn(row[0], row[1], row[2]) for row in txn] + + if rows: + topo = rows[-1].topological_ordering + toke = rows[-1].stream_ordering + if direction == 'b': + # Tokens are positions between events. + # This token points *after* the last event in the chunk. + # We need it to point to the event before it in the chunk + # when we are going backwards so we subtract one from the + # stream part. + toke -= 1 + next_token = RoomStreamToken(topo, toke) + else: + # TODO (erikj): We should work out what to do here instead. + next_token = to_token if to_token else from_token + + return rows, str(next_token), + + @defer.inlineCallbacks + def paginate_room_events(self, room_id, from_key, to_key=None, + direction='b', limit=-1, event_filter=None): + """Returns list of events before or after a given token. - return rows, next_token, + Args: + room_id (str) + from_key (str): The token used to stream from + to_key (str|None): A token which if given limits the results to + only those before + direction(char): Either 'b' or 'f' to indicate whether we are + paginating forwards or backwards from `from_key`. + limit (int): The maximum number of events to return. Zero or less + means no limit. + event_filter (Filter|None): If provided filters the events to + those that match the filter. - rows, token = yield self.runInteraction("paginate_room_events", f) + Returns: + tuple[list[dict], str]: Returns the results as a list of dicts and + a token that points to the end of the result set. The dicts have + the keys "event_id", "topological_ordering" and "stream_orderign". + """ + + from_key = RoomStreamToken.parse(from_key) + if to_key: + to_key = RoomStreamToken.parse(to_key) + + rows, token = yield self.runInteraction( + "paginate_room_events", self._paginate_room_events_txn, + room_id, from_key, to_key, direction, limit, event_filter, + ) events = yield self._get_events( - [r["event_id"] for r in rows], + [r.event_id for r in rows], get_prev_content=True ) self._set_before_and_after(events, rows) defer.returnValue((events, token)) + + +class StreamStore(StreamWorkerStore): + def get_room_max_stream_ordering(self): + return self._stream_id_gen.get_current_token() + + def get_room_min_stream_ordering(self): + return self._backfill_id_gen.get_current_token() |