From a0603523d2e210cf59f887bd75e1a755720cb7a8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 May 2019 16:59:21 +0100 Subject: Add aggregations API --- synapse/storage/relations.py | 225 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 219 insertions(+), 6 deletions(-) (limited to 'synapse/storage/relations.py') diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 31ef6679af..db4b842c97 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -18,7 +18,9 @@ import logging import attr from synapse.api.constants import RelationTypes +from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore +from synapse.storage.stream import generate_pagination_where_clause logger = logging.getLogger(__name__) @@ -29,19 +31,94 @@ class PaginationChunk(object): Attributes: chunk (list): The rows returned by pagination + next_batch (Any|None): Token to fetch next set of results with, if + None then there are no more results. + prev_batch (Any|None): Token to fetch previous set of results with, if + None then there are no previous results. """ chunk = attr.ib() + next_batch = attr.ib(default=None) + prev_batch = attr.ib(default=None) def to_dict(self): d = {"chunk": self.chunk} + if self.next_batch: + d["next_batch"] = self.next_batch.to_string() + + if self.prev_batch: + d["prev_batch"] = self.prev_batch.to_string() + return d +@attr.s +class RelationPaginationToken(object): + """Pagination token for relation pagination API. + + As the results are order by topological ordering, we can use the + `topological_ordering` and `stream_ordering` fields of the events at the + boundaries of the chunk as pagination tokens. + + Attributes: + topological (int): The topological ordering of the boundary event + stream (int): The stream ordering of the boundary event. + """ + + topological = attr.ib() + stream = attr.ib() + + @staticmethod + def from_string(string): + try: + t, s = string.split("-") + return RelationPaginationToken(int(t), int(s)) + except ValueError: + raise SynapseError(400, "Invalid token") + + def to_string(self): + return "%d-%d" % (self.topological, self.stream) + + +@attr.s +class AggregationPaginationToken(object): + """Pagination token for relation aggregation pagination API. + + As the results are order by count and then MAX(stream_ordering) of the + aggregation groups, we can just use them as our pagination token. + + Attributes: + count (int): The count of relations in the boundar group. + stream (int): The MAX stream ordering in the boundary group. + """ + + count = attr.ib() + stream = attr.ib() + + @staticmethod + def from_string(string): + try: + c, s = string.split("-") + return AggregationPaginationToken(int(c), int(s)) + except ValueError: + raise SynapseError(400, "Invalid token") + + def to_string(self): + return "%d-%d" % (self.count, self.stream) + + class RelationsStore(SQLBaseStore): def get_relations_for_event( - self, event_id, relation_type=None, event_type=None, limit=5, direction="b" + self, + event_id, + relation_type=None, + event_type=None, + aggregation_key=None, + limit=5, + direction="b", + from_token=None, + to_token=None, ): """Get a list of relations for an event, ordered by topological ordering. @@ -51,16 +128,26 @@ class RelationsStore(SQLBaseStore): type, if given. event_type (str|None): Only fetch events with this event type, if given. + aggregation_key (str|None): Only fetch events with this aggregation + key, if given. limit (int): Only fetch the most recent `limit` events. direction (str): Whether to fetch the most recent first (`"b"`) or the oldest first (`"f"`). + from_token (RelationPaginationToken|None): Fetch rows from the given + token, or from the start if None. + to_token (RelationPaginationToken|None): Fetch rows up to the given + token, or up to the end if None. Returns: Deferred[PaginationChunk]: List of event IDs that match relations requested. The rows are of the form `{"event_id": "..."}`. """ - # TODO: Pagination tokens + if from_token: + from_token = RelationPaginationToken.from_string(from_token) + + if to_token: + to_token = RelationPaginationToken.from_string(to_token) where_clause = ["relates_to_id = ?"] where_args = [event_id] @@ -73,12 +160,29 @@ class RelationsStore(SQLBaseStore): where_clause.append("type = ?") where_args.append(event_type) - order = "ASC" + if aggregation_key: + where_clause.append("aggregation_key = ?") + where_args.append(aggregation_key) + + pagination_clause = generate_pagination_where_clause( + direction=direction, + column_names=("topological_ordering", "stream_ordering"), + from_token=attr.astuple(from_token) if from_token else None, + to_token=attr.astuple(to_token) if to_token else None, + engine=self.database_engine, + ) + + if pagination_clause: + where_clause.append(pagination_clause) + if direction == "b": order = "DESC" + else: + order = "ASC" sql = """ - SELECT event_id FROM event_relations + SELECT event_id, topological_ordering, stream_ordering + FROM event_relations INNER JOIN events USING (event_id) WHERE %s ORDER BY topological_ordering %s, stream_ordering %s @@ -92,16 +196,125 @@ class RelationsStore(SQLBaseStore): def _get_recent_references_for_event_txn(txn): txn.execute(sql, where_args + [limit + 1]) - events = [{"event_id": row[0]} for row in txn] + last_topo_id = None + last_stream_id = None + events = [] + for row in txn: + events.append({"event_id": row[0]}) + last_topo_id = row[1] + last_stream_id = row[2] + + next_batch = None + if len(events) > limit and last_topo_id and last_stream_id: + next_batch = RelationPaginationToken(last_topo_id, last_stream_id) return PaginationChunk( - chunk=list(events[:limit]), + chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token ) return self.runInteraction( "get_recent_references_for_event", _get_recent_references_for_event_txn ) + def get_aggregation_groups_for_event( + self, + event_id, + event_type=None, + limit=5, + direction="b", + from_token=None, + to_token=None, + ): + """Get a list of annotations on the event, grouped by event type and + aggregation key, sorted by count. + + This is used e.g. to get the what and how many reactions have happend + on an event. + + Args: + event_id (str): Fetch events that relate to this event ID. + event_type (str|None): Only fetch events with this event type, if + given. + limit (int): Only fetch the `limit` groups. + direction (str): Whether to fetch the highest count first (`"b"`) or + the lowest count first (`"f"`). + from_token (AggregationPaginationToken|None): Fetch rows from the + given token, or from the start if None. + to_token (AggregationPaginationToken|None): Fetch rows up to the + given token, or up to the end if None. + + + Returns: + Deferred[PaginationChunk]: List of groups of annotations that + match. Each row is a dict with `type`, `key` and `count` fields. + """ + + if from_token: + from_token = AggregationPaginationToken.from_string(from_token) + + if to_token: + to_token = AggregationPaginationToken.from_string(to_token) + + where_clause = ["relates_to_id = ?", "relation_type = ?"] + where_args = [event_id, RelationTypes.ANNOTATION] + + if event_type: + where_clause.append("type = ?") + where_args.append(event_type) + + having_clause = generate_pagination_where_clause( + direction=direction, + column_names=("COUNT(*)", "MAX(stream_ordering)"), + from_token=attr.astuple(from_token) if from_token else None, + to_token=attr.astuple(to_token) if to_token else None, + engine=self.database_engine, + ) + + if direction == "b": + order = "DESC" + else: + order = "ASC" + + if having_clause: + having_clause = "HAVING " + having_clause + else: + having_clause = "" + + sql = """ + SELECT type, aggregation_key, COUNT(*), MAX(stream_ordering) + FROM event_relations + INNER JOIN events USING (event_id) + WHERE {where_clause} + GROUP BY relation_type, type, aggregation_key + {having_clause} + ORDER BY COUNT(*) {order}, MAX(stream_ordering) {order} + LIMIT ? + """.format( + where_clause=" AND ".join(where_clause), + order=order, + having_clause=having_clause, + ) + + def _get_aggregation_groups_for_event_txn(txn): + txn.execute(sql, where_args + [limit + 1]) + + next_batch = None + events = [] + for row in txn: + events.append({"type": row[0], "key": row[1], "count": row[2]}) + next_batch = AggregationPaginationToken(row[2], row[3]) + + if len(events) <= limit: + next_batch = None + + return PaginationChunk( + chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token + ) + + return self.runInteraction( + "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn + ) + def _handle_event_relations(self, txn, event): """Handles inserting relation data during peristence of events -- cgit 1.4.1 From 33453419b0cbbe29d3f4b376e9eafab10d5d012a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 09:56:12 +0100 Subject: Add cache to relations --- synapse/storage/relations.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'synapse/storage/relations.py') diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index db4b842c97..1fd3d4fafc 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -21,6 +21,7 @@ from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore from synapse.storage.stream import generate_pagination_where_clause +from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) @@ -109,6 +110,7 @@ class AggregationPaginationToken(object): class RelationsStore(SQLBaseStore): + @cached(tree=True) def get_relations_for_event( self, event_id, @@ -216,6 +218,7 @@ class RelationsStore(SQLBaseStore): "get_recent_references_for_event", _get_recent_references_for_event_txn ) + @cached(tree=True) def get_aggregation_groups_for_event( self, event_id, @@ -353,3 +356,8 @@ class RelationsStore(SQLBaseStore): "aggregation_key": aggregation_key, }, ) + + txn.call_after(self.get_relations_for_event.invalidate_many, (parent_id,)) + txn.call_after( + self.get_aggregation_groups_for_event.invalidate_many, (parent_id,) + ) -- cgit 1.4.1 From b5c62c6b2643a138956af0b04521540c270a3e94 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 10:18:53 +0100 Subject: Fix relations in worker mode --- synapse/replication/slave/storage/events.py | 13 ++++++++++--- synapse/replication/tcp/streams/_base.py | 1 + synapse/replication/tcp/streams/events.py | 11 ++++++----- synapse/storage/events.py | 12 ++++++++---- synapse/storage/relations.py | 4 +++- 5 files changed, 28 insertions(+), 13 deletions(-) (limited to 'synapse/storage/relations.py') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index b457c5563f..797450bc66 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -23,6 +23,7 @@ from synapse.replication.tcp.streams.events import ( from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore +from synapse.storage.relations import RelationsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.signatures import SignatureWorkerStore from synapse.storage.state import StateGroupWorkerStore @@ -52,6 +53,7 @@ class SlavedEventStore(EventFederationWorkerStore, EventsWorkerStore, SignatureWorkerStore, UserErasureWorkerStore, + RelationsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): @@ -89,7 +91,7 @@ class SlavedEventStore(EventFederationWorkerStore, for row in rows: self.invalidate_caches_for_event( -token, row.event_id, row.room_id, row.type, row.state_key, - row.redacts, + row.redacts, row.relates_to, backfilled=True, ) return super(SlavedEventStore, self).process_replication_rows( @@ -102,7 +104,7 @@ class SlavedEventStore(EventFederationWorkerStore, if row.type == EventsStreamEventRow.TypeId: self.invalidate_caches_for_event( token, data.event_id, data.room_id, data.type, data.state_key, - data.redacts, + data.redacts, data.relates_to, backfilled=False, ) elif row.type == EventsStreamCurrentStateRow.TypeId: @@ -114,7 +116,8 @@ class SlavedEventStore(EventFederationWorkerStore, raise Exception("Unknown events stream row type %s" % (row.type, )) def invalidate_caches_for_event(self, stream_ordering, event_id, room_id, - etype, state_key, redacts, backfilled): + etype, state_key, redacts, relates_to, + backfilled): self._invalidate_get_event_cache(event_id) self.get_latest_event_ids_in_room.invalidate((room_id,)) @@ -136,3 +139,7 @@ class SlavedEventStore(EventFederationWorkerStore, state_key, stream_ordering ) self.get_invited_rooms_for_user.invalidate((state_key,)) + + if relates_to: + self.get_relations_for_event.invalidate_many((relates_to,)) + self.get_aggregation_groups_for_event.invalidate_many((relates_to,)) diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 8971a6a22e..b6ce7a7bee 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -32,6 +32,7 @@ BackfillStreamRow = namedtuple("BackfillStreamRow", ( "type", # str "state_key", # str, optional "redacts", # str, optional + "relates_to", # str, optional )) PresenceStreamRow = namedtuple("PresenceStreamRow", ( "user_id", # str diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index e0f6e29248..f1290d022a 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -80,11 +80,12 @@ class BaseEventsStreamRow(object): class EventsStreamEventRow(BaseEventsStreamRow): TypeId = "ev" - event_id = attr.ib() # str - room_id = attr.ib() # str - type = attr.ib() # str - state_key = attr.ib() # str, optional - redacts = attr.ib() # str, optional + event_id = attr.ib() # str + room_id = attr.ib() # str + type = attr.ib() # str + state_key = attr.ib() # str, optional + redacts = attr.ib() # str, optional + relates_to = attr.ib() # str, optional @attr.s(slots=True, frozen=True) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6802bf42ce..b025ebc926 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1657,10 +1657,11 @@ class EventsStore( def get_all_new_forward_event_rows(txn): sql = ( "SELECT e.stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? < stream_ordering AND stream_ordering <= ?" " ORDER BY stream_ordering ASC" " LIMIT ?" @@ -1675,11 +1676,12 @@ class EventsStore( sql = ( "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " INNER JOIN ex_outlier_stream USING (event_id)" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? < event_stream_ordering" " AND event_stream_ordering <= ?" " ORDER BY event_stream_ordering DESC" @@ -1700,10 +1702,11 @@ class EventsStore( def get_all_new_backfill_event_rows(txn): sql = ( "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? > stream_ordering AND stream_ordering >= ?" " ORDER BY stream_ordering ASC" " LIMIT ?" @@ -1718,11 +1721,12 @@ class EventsStore( sql = ( "SELECT -event_stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts" + " state_key, redacts, relates_to_id" " FROM events AS e" " INNER JOIN ex_outlier_stream USING (event_id)" " LEFT JOIN redactions USING (event_id)" " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN event_relations USING (event_id)" " WHERE ? > event_stream_ordering" " AND event_stream_ordering >= ?" " ORDER BY event_stream_ordering DESC" diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 1fd3d4fafc..732418ec65 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -109,7 +109,7 @@ class AggregationPaginationToken(object): return "%d-%d" % (self.count, self.stream) -class RelationsStore(SQLBaseStore): +class RelationsWorkerStore(SQLBaseStore): @cached(tree=True) def get_relations_for_event( self, @@ -318,6 +318,8 @@ class RelationsStore(SQLBaseStore): "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn ) + +class RelationsStore(RelationsWorkerStore): def _handle_event_relations(self, txn, event): """Handles inserting relation data during peristence of events -- cgit 1.4.1 From 2c662ddde4e1277a0dc17295748aa5f0c41fa163 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 14:21:39 +0100 Subject: Indirect tuple conversion --- synapse/storage/relations.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'synapse/storage/relations.py') diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 732418ec65..996cb6903a 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -81,6 +81,9 @@ class RelationPaginationToken(object): def to_string(self): return "%d-%d" % (self.topological, self.stream) + def as_tuple(self): + return attr.astuple(self) + @attr.s class AggregationPaginationToken(object): @@ -108,6 +111,9 @@ class AggregationPaginationToken(object): def to_string(self): return "%d-%d" % (self.count, self.stream) + def as_tuple(self): + return attr.astuple(self) + class RelationsWorkerStore(SQLBaseStore): @cached(tree=True) -- cgit 1.4.1 From 7a7eba8302d6566c044ca82c8ac0da65aa85e36b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2019 14:24:58 +0100 Subject: Move parsing of tokens out of storage layer --- synapse/rest/client/v2_alpha/relations.py | 19 +++++++++++++++++++ synapse/storage/relations.py | 16 ++-------------- 2 files changed, 21 insertions(+), 14 deletions(-) (limited to 'synapse/storage/relations.py') diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py index 1b53e638eb..41e0a44936 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/v2_alpha/relations.py @@ -32,6 +32,7 @@ from synapse.http.servlet import ( parse_string, ) from synapse.rest.client.transactions import HttpTransactionCache +from synapse.storage.relations import AggregationPaginationToken, RelationPaginationToken from ._base import client_v2_patterns @@ -149,6 +150,12 @@ class RelationPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") + if from_token: + from_token = RelationPaginationToken.from_string(from_token) + + if to_token: + to_token = RelationPaginationToken.from_string(to_token) + result = yield self.store.get_relations_for_event( event_id=parent_id, relation_type=relation_type, @@ -221,6 +228,12 @@ class RelationAggregationPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") + if from_token: + from_token = AggregationPaginationToken.from_string(from_token) + + if to_token: + to_token = AggregationPaginationToken.from_string(to_token) + res = yield self.store.get_aggregation_groups_for_event( event_id=parent_id, event_type=event_type, @@ -289,6 +302,12 @@ class RelationAggregationGroupPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") + if from_token: + from_token = RelationPaginationToken.from_string(from_token) + + if to_token: + to_token = RelationPaginationToken.from_string(to_token) + result = yield self.store.get_relations_for_event( event_id=parent_id, relation_type=relation_type, diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 996cb6903a..de67e305a1 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -54,7 +54,7 @@ class PaginationChunk(object): return d -@attr.s +@attr.s(frozen=True, slots=True) class RelationPaginationToken(object): """Pagination token for relation pagination API. @@ -85,7 +85,7 @@ class RelationPaginationToken(object): return attr.astuple(self) -@attr.s +@attr.s(frozen=True, slots=True) class AggregationPaginationToken(object): """Pagination token for relation aggregation pagination API. @@ -151,12 +151,6 @@ class RelationsWorkerStore(SQLBaseStore): requested. The rows are of the form `{"event_id": "..."}`. """ - if from_token: - from_token = RelationPaginationToken.from_string(from_token) - - if to_token: - to_token = RelationPaginationToken.from_string(to_token) - where_clause = ["relates_to_id = ?"] where_args = [event_id] @@ -258,12 +252,6 @@ class RelationsWorkerStore(SQLBaseStore): match. Each row is a dict with `type`, `key` and `count` fields. """ - if from_token: - from_token = AggregationPaginationToken.from_string(from_token) - - if to_token: - to_token = AggregationPaginationToken.from_string(to_token) - where_clause = ["relates_to_id = ?", "relation_type = ?"] where_args = [event_id, RelationTypes.ANNOTATION] -- cgit 1.4.1