diff options
author | Brendan Abolivier <babolivier@matrix.org> | 2019-11-01 17:04:45 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-01 17:04:45 +0000 |
commit | f496d2587723b0e802c49c210476266a19733f55 (patch) | |
tree | 9a2ea1817fdddec2f719e4ffe5d3150bf6f8599c /synapse | |
parent | Factor out an _AsyncEventContextImpl (#6298) (diff) | |
parent | Incorporate review (diff) | |
download | synapse-f496d2587723b0e802c49c210476266a19733f55.tar.xz |
Merge pull request #6301 from matrix-org/babolivier/msc2326
Implement MSC2326 (label based filtering)
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/api/constants.py | 7 | ||||
-rw-r--r-- | synapse/api/filtering.py | 15 | ||||
-rw-r--r-- | synapse/rest/client/versions.py | 3 | ||||
-rw-r--r-- | synapse/storage/data_stores/main/events.py | 36 | ||||
-rw-r--r-- | synapse/storage/data_stores/main/schema/delta/56/event_labels.sql | 30 | ||||
-rw-r--r-- | synapse/storage/data_stores/main/stream.py | 11 |
6 files changed, 98 insertions, 4 deletions
diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 312196675e..49c4b85054 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -138,3 +138,10 @@ class LimitBlockingTypes(object): MONTHLY_ACTIVE_USER = "monthly_active_user" HS_DISABLED = "hs_disabled" + + +class EventContentFields(object): + """Fields found in events' content, regardless of type.""" + + # Labels for the event, cf https://github.com/matrix-org/matrix-doc/pull/2326 + LABELS = "org.matrix.labels" diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 9f06556bd2..bec13f08d8 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -20,6 +20,7 @@ from jsonschema import FormatChecker from twisted.internet import defer +from synapse.api.constants import EventContentFields from synapse.api.errors import SynapseError from synapse.storage.presence import UserPresenceState from synapse.types import RoomID, UserID @@ -66,6 +67,10 @@ ROOM_EVENT_FILTER_SCHEMA = { "contains_url": {"type": "boolean"}, "lazy_load_members": {"type": "boolean"}, "include_redundant_members": {"type": "boolean"}, + # Include or exclude events with the provided labels. + # cf https://github.com/matrix-org/matrix-doc/pull/2326 + "org.matrix.labels": {"type": "array", "items": {"type": "string"}}, + "org.matrix.not_labels": {"type": "array", "items": {"type": "string"}}, }, } @@ -259,6 +264,9 @@ class Filter(object): self.contains_url = self.filter_json.get("contains_url", None) + self.labels = self.filter_json.get("org.matrix.labels", None) + self.not_labels = self.filter_json.get("org.matrix.not_labels", []) + def filters_all_types(self): return "*" in self.not_types @@ -282,6 +290,7 @@ class Filter(object): room_id = None ev_type = "m.presence" contains_url = False + labels = [] else: sender = event.get("sender", None) if not sender: @@ -300,10 +309,11 @@ class Filter(object): content = event.get("content", {}) # check if there is a string url field in the content for filtering purposes contains_url = isinstance(content.get("url"), text_type) + labels = content.get(EventContentFields.LABELS, []) - return self.check_fields(room_id, sender, ev_type, contains_url) + return self.check_fields(room_id, sender, ev_type, labels, contains_url) - def check_fields(self, room_id, sender, event_type, contains_url): + def check_fields(self, room_id, sender, event_type, labels, contains_url): """Checks whether the filter matches the given event fields. Returns: @@ -313,6 +323,7 @@ class Filter(object): "rooms": lambda v: room_id == v, "senders": lambda v: sender == v, "types": lambda v: _matches_wildcard(event_type, v), + "labels": lambda v: v in labels, } for name, match_func in literal_keys.items(): diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 1044ae7b4e..bb30ce3f34 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -65,6 +65,9 @@ class VersionsRestServlet(RestServlet): "m.require_identity_server": False, # as per MSC2290 "m.separate_add_and_bind": True, + # Implements support for label-based filtering as described in + # MSC2326. + "org.matrix.label_based_filtering": True, }, }, ) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index aafc2007d3..301f8ea128 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -29,7 +29,7 @@ from prometheus_client import Counter from twisted.internet import defer import synapse.metrics -from synapse.api.constants import EventTypes +from synapse.api.constants import EventContentFields, EventTypes from synapse.api.errors import SynapseError from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 @@ -932,6 +932,13 @@ class EventsStore( self._handle_event_relations(txn, event) + # Store the labels for this event. + labels = event.content.get(EventContentFields.LABELS) + if labels: + self.insert_labels_for_event_txn( + txn, event.event_id, labels, event.room_id, event.depth + ) + # Insert into the room_memberships table. self._store_room_members_txn( txn, @@ -1917,6 +1924,33 @@ class EventsStore( get_all_updated_current_state_deltas_txn, ) + def insert_labels_for_event_txn( + self, txn, event_id, labels, room_id, topological_ordering + ): + """Store the mapping between an event's ID and its labels, with one row per + (event_id, label) tuple. + + Args: + txn (LoggingTransaction): The transaction to execute. + event_id (str): The event's ID. + labels (list[str]): A list of text labels. + room_id (str): The ID of the room the event was sent to. + topological_ordering (int): The position of the event in the room's topology. + """ + return self._simple_insert_many_txn( + txn=txn, + table="event_labels", + values=[ + { + "event_id": event_id, + "label": label, + "room_id": room_id, + "topological_ordering": topological_ordering, + } + for label in labels + ], + ) + AllNewEventsResult = namedtuple( "AllNewEventsResult", diff --git a/synapse/storage/data_stores/main/schema/delta/56/event_labels.sql b/synapse/storage/data_stores/main/schema/delta/56/event_labels.sql new file mode 100644 index 0000000000..5e29c1da19 --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/56/event_labels.sql @@ -0,0 +1,30 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- room_id and topoligical_ordering are denormalised from the events table in order to +-- make the index work. +CREATE TABLE IF NOT EXISTS event_labels ( + event_id TEXT, + label TEXT, + room_id TEXT NOT NULL, + topological_ordering BIGINT NOT NULL, + PRIMARY KEY(event_id, label) +); + + +-- This index enables an event pagination looking for a particular label to index the +-- event_labels table first, which is much quicker than scanning the events table and then +-- filtering by label, if the label is rarely used relative to the size of the room. +CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering); diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 263999dfca..616ef91d4e 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -229,6 +229,14 @@ def filter_to_clause(event_filter): clauses.append("contains_url = ?") args.append(event_filter.contains_url) + # We're only applying the "labels" filter on the database query, because applying the + # "not_labels" filter via a SQL query is non-trivial. Instead, we let + # event_filter.check_fields apply it, which is not as efficient but makes the + # implementation simpler. + if event_filter.labels: + clauses.append("(%s)" % " OR ".join("label = ?" for _ in event_filter.labels)) + args.extend(event_filter.labels) + return " AND ".join(clauses), args @@ -864,8 +872,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): args.append(int(limit)) sql = ( - "SELECT event_id, topological_ordering, stream_ordering" + "SELECT DISTINCT event_id, topological_ordering, stream_ordering" " FROM events" + " LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)" " WHERE outlier = ? AND room_id = ? AND %(bounds)s" " ORDER BY topological_ordering %(order)s," " stream_ordering %(order)s LIMIT ?" |