From c85c9125627a62c73711786723be12be30d7a81e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Oct 2015 15:48:31 +0100 Subject: Add basic full text search impl. --- synapse/storage/__init__.py | 2 + synapse/storage/_base.py | 2 +- synapse/storage/schema/delta/24/fts.py | 57 ++++++++++++++++++++++++++ synapse/storage/search.py | 75 ++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/24/fts.py create mode 100644 synapse/storage/search.py (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 340e59afcb..5f91ef77c0 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -40,6 +40,7 @@ from .filtering import FilteringStore from .end_to_end_keys import EndToEndKeyStore from .receipts import ReceiptsStore +from .search import SearchStore import fnmatch @@ -79,6 +80,7 @@ class DataStore(RoomMemberStore, RoomStore, EventsStore, ReceiptsStore, EndToEndKeyStore, + SearchStore, ): def __init__(self, hs): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 693784ad38..218e708054 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -519,7 +519,7 @@ class SQLBaseStore(object): allow_none=False, desc="_simple_select_one_onecol"): """Executes a SELECT query on the named table, which is expected to - return a single row, returning a single column from it." + return a single row, returning a single column from it. Args: table : string giving the table name diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py new file mode 100644 index 0000000000..5680332758 --- /dev/null +++ b/synapse/storage/schema/delta/24/fts.py @@ -0,0 +1,57 @@ +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage import get_statements +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +POSTGRES_SQL = """ +CREATE TABLE event_search ( + event_id TEXT, + room_id TEXT, + key TEXT, + vector tsvector +); + +INSERT INTO event_search SELECT + event_id, room_id, 'content.body', + to_tsvector('english', json::json->'content'->>'body') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.name', + to_tsvector('english', json::json->'content'->>'name') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.topic', + to_tsvector('english', json::json->'content'->>'topic') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; + + +CREATE INDEX event_search_idx ON event_search USING gin(vector); +""" + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if not isinstance(database_engine, PostgresEngine): + # We only support FTS for postgres currently. + return + + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) diff --git a/synapse/storage/search.py b/synapse/storage/search.py new file mode 100644 index 0000000000..eea4477765 --- /dev/null +++ b/synapse/storage/search.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from _base import SQLBaseStore +from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes + + +class SearchStore(SQLBaseStore): + @defer.inlineCallbacks + def search_msgs(self, constraints): + clauses = [] + args = [] + fts = None + + for c in constraints: + local_clauses = [] + if c.search_type == SearchConstraintTypes.FTS: + fts = c.value + for key in c.keys: + local_clauses.append("key = ?") + args.append(key) + elif c.search_type == SearchConstraintTypes.EXACT: + for key in c.keys: + if key == KnownRoomEventKeys.ROOM_ID: + for value in c.value: + local_clauses.append("room_id = ?") + args.append(value) + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) + + sql = ( + "SELECT ts_rank_cd(vector, query) AS rank, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " WHERE vector @@ query" + ) + + for clause in clauses: + sql += " AND " + clause + + sql += " ORDER BY rank DESC" + + results = yield self._execute( + "search_msgs", self.cursor_to_dict, sql, *([fts] + args) + ) + + events = yield self._get_events([r["event_id"] for r in results]) + + event_map = { + ev.event_id: ev + for ev in events + } + + defer.returnValue([ + { + "rank": r["rank"], + "result": event_map[r["event_id"]] + } + for r in results + if r["event_id"] in event_map + ]) -- cgit 1.4.1 From 61561b9df791ec90e287e535cc75831c2016bf36 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 10:49:53 +0100 Subject: Keep FTS indexes up to date. Only search through rooms currently joined --- synapse/handlers/search.py | 31 ++++++++++++++++++++++--------- synapse/rest/client/v1/room.py | 2 +- synapse/storage/events.py | 2 ++ synapse/storage/room.py | 22 ++++++++++++++++++++++ synapse/storage/schema/delta/24/fts.py | 3 ++- synapse/storage/search.py | 7 ++++++- 6 files changed, 55 insertions(+), 12 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8b997fc394..b6bdb752e9 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -65,7 +65,7 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def search(self, content): + def search(self, user, content): constraint_dicts = content["search_categories"]["room_events"]["constraints"] constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] @@ -76,20 +76,33 @@ class SearchHandler(BaseHandler): raise SynapseError(400, "Only one constraint can be FTS") fts = True - res = yield self.hs.get_datastore().search_msgs(constraints) + rooms = yield self.store.get_rooms_for_user( + user.to_string(), + ) - time_now = self.hs.get_clock().time_msec() + # For some reason the list of events contains duplicates + # TODO(paul): work out why because I really don't think it should + room_ids = set(r.room_id for r in rooms) - results = [ - { + res = yield self.store.search_msgs(room_ids, constraints) + + time_now = self.clock.time_msec() + + results = { + r["result"].event_id: { "rank": r["rank"], "result": serialize_event(r["result"], time_now) } for r in res - ] + } logger.info("returning: %r", results) - results.sort(key=lambda r: -r["rank"]) - - defer.returnValue(results) + defer.returnValue({ + "search_categories": { + "room_events": { + "results": results, + "count": len(results) + } + } + }) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 35bd702a43..94adabca62 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -540,7 +540,7 @@ class SearchRestServlet(ClientV1RestServlet): content = _parse_json(request) - results = yield self.handlers.search_handler.search(content) + results = yield self.handlers.search_handler.search(auth_user, content) defer.returnValue((200, results)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 416ef6af93..e6c1abfc27 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -307,6 +307,8 @@ class EventsStore(SQLBaseStore): self._store_room_name_txn(txn, event) elif event.type == EventTypes.Topic: self._store_room_topic_txn(txn, event) + elif event.type == EventTypes.Message: + self._store_room_message_txn(txn, event) elif event.type == EventTypes.Redaction: self._store_redaction(txn, event) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5e07b7e0e5..e4e830944a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -175,6 +175,10 @@ class RoomStore(SQLBaseStore): }, ) + self._store_event_search_txn( + txn, event, "content.topic", event.content["topic"] + ) + def _store_room_name_txn(self, txn, event): if hasattr(event, "content") and "name" in event.content: self._simple_insert_txn( @@ -187,6 +191,24 @@ class RoomStore(SQLBaseStore): } ) + self._store_event_search_txn( + txn, event, "content.name", event.content["name"] + ) + + def _store_room_message_txn(self, txn, event): + if hasattr(event, "content") and "body" in event.content: + self._store_event_search_txn( + txn, event, "content.body", event.content["body"] + ) + + def _store_event_search_txn(self, txn, event, key, value): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + @cachedInlineCallbacks() def get_room_name_and_aliases(self, room_id): def f(txn): diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index 5680332758..05f1605fdd 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -44,7 +44,8 @@ INSERT INTO event_search SELECT FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; -CREATE INDEX event_search_idx ON event_search USING gin(vector); +CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); +CREATE INDEX event_search_ev_idx ON event_search(event_id); """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index eea4477765..e66b5f9edc 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -21,11 +21,16 @@ from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes class SearchStore(SQLBaseStore): @defer.inlineCallbacks - def search_msgs(self, constraints): + def search_msgs(self, room_ids, constraints): clauses = [] args = [] fts = None + clauses.append( + "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) + ) + args.extend(room_ids) + for c in constraints: local_clauses = [] if c.search_type == SearchConstraintTypes.FTS: -- cgit 1.4.1 From 927004e34905d4ad6a69576ee1799fe8019d8985 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 15:06:14 +0100 Subject: Remove unused room_id parameter --- synapse/handlers/federation.py | 2 +- synapse/handlers/message.py | 10 ++++----- synapse/handlers/search.py | 50 +++++++++++++++++++++++++++++++++++++++++- synapse/handlers/sync.py | 2 +- synapse/storage/state.py | 11 +++++----- 5 files changed, 61 insertions(+), 14 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3882ba79ed..a710bdcfdb 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -242,7 +242,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_server(self, server_name, room_id, events): event_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, None), diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 30949ff7a6..d2f0892f7a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -164,7 +164,7 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): event_id_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), @@ -290,7 +290,7 @@ class MessageHandler(BaseHandler): elif member_event.membership == Membership.LEAVE: key = (event_type, state_key) room_state = yield self.store.get_state_for_events( - room_id, [member_event.event_id], [key] + [member_event.event_id], [key] ) data = room_state[member_event.event_id].get(key) @@ -314,7 +314,7 @@ class MessageHandler(BaseHandler): room_state = yield self.state_handler.get_current_state(room_id) elif member_event.membership == Membership.LEAVE: room_state = yield self.store.get_state_for_events( - room_id, [member_event.event_id], None + [member_event.event_id], None ) room_state = room_state[member_event.event_id] @@ -403,7 +403,7 @@ class MessageHandler(BaseHandler): elif event.membership == Membership.LEAVE: room_end_token = "s%d" % (event.stream_ordering,) deferred_room_state = self.store.get_state_for_events( - event.room_id, [event.event_id], None + [event.event_id], None ) deferred_room_state.addCallback( lambda states: states[event.event_id] @@ -496,7 +496,7 @@ class MessageHandler(BaseHandler): def _room_initial_sync_parted(self, user_id, room_id, pagin_config, member_event): room_state = yield self.store.get_state_for_events( - member_event.room_id, [member_event.event_id], None + [member_event.event_id], None ) room_state = room_state[member_event.event_id] diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 9dc474aa56..71182a8fe0 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -17,7 +17,9 @@ from twisted.internet import defer from ._base import BaseHandler -from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.api.constants import ( + EventTypes, KnownRoomEventKeys, Membership, SearchConstraintTypes +) from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -71,6 +73,52 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def _filter_events_for_client(self, user_id, room_id, events): + event_id_to_state = yield self.store.get_state_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) + ) + + def allowed(event, state): + if event.type == EventTypes.RoomHistoryVisibility: + return True + + membership_ev = state.get((EventTypes.Member, user_id), None) + if membership_ev: + membership = membership_ev.membership + else: + membership = Membership.LEAVE + + if membership == Membership.JOIN: + return True + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + else: + visibility = "shared" + + if visibility == "public": + return True + elif visibility == "shared": + return True + elif visibility == "joined": + return membership == Membership.JOIN + elif visibility == "invited": + return membership == Membership.INVITE + + return True + + defer.returnValue([ + event + for event in events + if allowed(event, event_id_to_state[event.event_id]) + ]) + @defer.inlineCallbacks def search(self, user, content): constraint_dicts = content["search_categories"]["room_events"]["constraints"] diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9914ff6f9c..a8940de166 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -312,7 +312,7 @@ class SyncHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): event_id_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e935b9443b..acfb322a53 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -54,7 +54,7 @@ class StateStore(SQLBaseStore): defer.returnValue({}) event_to_groups = yield self._get_state_group_for_events( - room_id, event_ids, + event_ids, ) groups = set(event_to_groups.values()) @@ -208,13 +208,12 @@ class StateStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_state_for_events(self, room_id, event_ids, types): + def get_state_for_events(self, event_ids, types): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. Args: - room_id (str) event_ids (list) types (list): List of (type, state_key) tuples which are used to filter the state fetched. `state_key` may be None, which matches @@ -225,7 +224,7 @@ class StateStore(SQLBaseStore): The dicts are mappings from (type, state_key) -> state_events """ event_to_groups = yield self._get_state_group_for_events( - room_id, event_ids, + event_ids, ) groups = set(event_to_groups.values()) @@ -251,8 +250,8 @@ class StateStore(SQLBaseStore): ) @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", - num_args=2) - def _get_state_group_for_events(self, room_id, event_ids): + num_args=1) + def _get_state_group_for_events(self, event_ids): """Returns mapping event_id -> state_group """ def f(txn): -- cgit 1.4.1 From ca53ad74250d94b8c9b6581e6cedef0a29520fc2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 15:52:55 +0100 Subject: Filter events to only thsoe that the user is allowed to see --- synapse/handlers/search.py | 16 ++++++++++------ synapse/storage/search.py | 14 +++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 71182a8fe0..49b786dadb 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -74,7 +74,7 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, room_id, events): + def _filter_events_for_client(self, user_id, events): event_id_to_state = yield self.store.get_state_for_events( frozenset(e.event_id for e in events), types=( @@ -139,16 +139,20 @@ class SearchHandler(BaseHandler): # TODO(paul): work out why because I really don't think it should room_ids = set(r.room_id for r in rooms) - res = yield self.store.search_msgs(room_ids, constraints) + rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) + + allowed_events = yield self._filter_events_for_client( + user.to_string(), event_map.values() + ) time_now = self.clock.time_msec() results = { - r["result"].event_id: { - "rank": r["rank"], - "result": serialize_event(r["result"], time_now) + e.event_id: { + "rank": rank_map[e.event_id], + "result": serialize_event(e, time_now) } - for r in res + for e in allowed_events } logger.info("returning: %r", results) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e66b5f9edc..238df38440 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -70,11 +70,11 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue([ + defer.returnValue(( { - "rank": r["rank"], - "result": event_map[r["event_id"]] - } - for r in results - if r["event_id"] in event_map - ]) + r["event_id"]: r["rank"] + for r in results + if r["event_id"] in event_map + }, + event_map + )) -- cgit 1.4.1 From 1a40afa75693f0c2ae3b2eaac62ff9ca6bb02488 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 10:36:25 +0100 Subject: Add sqlite schema --- synapse/storage/schema/delta/24/fts.py | 69 +++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index 05f1605fdd..a806f4b8d3 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -15,7 +15,9 @@ import logging from synapse.storage import get_statements -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +import ujson logger = logging.getLogger(__name__) @@ -46,13 +48,70 @@ INSERT INTO event_search SELECT CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); CREATE INDEX event_search_ev_idx ON event_search(event_id); +CREATE INDEX event_search_ev_ridx ON event_search(room_id); """ +SQLITE_TABLE = ( + "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" +) +SQLITE_INDEX = "CREATE INDEX event_search_ev_idx ON event_search(event_id)" + + def run_upgrade(cur, database_engine, *args, **kwargs): - if not isinstance(database_engine, PostgresEngine): - # We only support FTS for postgres currently. + if isinstance(database_engine, PostgresEngine): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) return - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) + if isinstance(database_engine, Sqlite3Engine): + cur.execute(SQLITE_TABLE) + + rowid = -1 + while True: + cur.execute( + "SELECT rowid, json FROM event_json" + " WHERE rowid > ?" + " ORDER BY rowid ASC LIMIT 100", + (rowid,) + ) + + res = cur.fetchall() + + if not res: + break + + events = [ + ujson.loads(js) + for _, js in res + ] + + rowid = max(rid for rid, _ in res) + + rows = [] + for ev in events: + if ev["type"] == "m.room.message": + rows.append(( + ev["event_id"], ev["room_id"], "content.body", + ev["content"]["body"] + )) + if ev["type"] == "m.room.name": + rows.append(( + ev["event_id"], ev["room_id"], "content.name", + ev["content"]["name"] + )) + if ev["type"] == "m.room.topic": + rows.append(( + ev["event_id"], ev["room_id"], "content.topic", + ev["content"]["topic"] + )) + + if rows: + logger.info(rows) + cur.executemany( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)", + rows + ) + + # cur.execute(SQLITE_INDEX) -- cgit 1.4.1 From cfd39d6b55fad5b176f1883e1bc87ed8e14acf42 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 13:47:50 +0100 Subject: Add SQLite support --- synapse/storage/search.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 238df38440..5843f80876 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -17,6 +17,7 @@ from twisted.internet import defer from _base import SQLBaseStore from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.storage.engines import PostgresEngine class SearchStore(SQLBaseStore): @@ -48,11 +49,17 @@ class SearchStore(SQLBaseStore): "(%s)" % (" OR ".join(local_clauses),) ) - sql = ( - "SELECT ts_rank_cd(vector, query) AS rank, event_id" - " FROM plainto_tsquery('english', ?) as query, event_search" - " WHERE vector @@ query" - ) + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "SELECT ts_rank_cd(vector, query) AS rank, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " WHERE vector @@ query" + ) + else: + sql = ( + "SELECT 0 as rank, event_id FROM event_search" + " WHERE value MATCH ?" + ) for clause in clauses: sql += " AND " + clause -- cgit 1.4.1 From 3e2a1297b513dc1fadb288c74684f6651a88016d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 15:22:14 +0100 Subject: Remove constraints in preperation of using filters --- synapse/handlers/search.py | 61 ++++++++-------------------------------------- synapse/storage/search.py | 30 ++++++++--------------- 2 files changed, 20 insertions(+), 71 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index d5c395061c..8864a921fc 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -18,7 +18,7 @@ from twisted.internet import defer from ._base import BaseHandler from synapse.api.constants import ( - EventTypes, KnownRoomEventKeys, Membership, SearchConstraintTypes + EventTypes, Membership, ) from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -29,45 +29,6 @@ import logging logger = logging.getLogger(__name__) -KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { - KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], - KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.CONTENT_NAME: [ - SearchConstraintTypes.FTS, - SearchConstraintTypes.EXACT, - SearchConstraintTypes.SUBSTRING, - ], - KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], - KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], - KnownRoomEventKeys.ROOM_ID: [SearchConstraintTypes.EXACT], -} - - -class RoomConstraint(object): - def __init__(self, search_type, keys, value): - self.search_type = search_type - self.keys = keys - self.value = value - - @classmethod - def from_dict(cls, d): - search_type = d["type"] - keys = d["keys"] - - for key in keys: - if key not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES: - raise SynapseError(400, "Unrecognized key %r", key) - - if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: - raise SynapseError( - 400, - "Disallowed constraint type %r for key %r", search_type, key - ) - - return cls(search_type, keys, d["value"]) - - class SearchHandler(BaseHandler): def __init__(self, hs): @@ -121,22 +82,20 @@ class SearchHandler(BaseHandler): @defer.inlineCallbacks def search(self, user, content): - constraint_dicts = content["search_categories"]["room_events"]["constraints"] - constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] - - fts = False - for c in constraints: - if c.search_type == SearchConstraintTypes.FTS: - if fts: - raise SynapseError(400, "Only one constraint can be FTS") - fts = True + try: + search_term = content["search_categories"]["room_events"]["search_term"] + keys = content["search_categories"]["room_events"]["keys"] + except KeyError: + raise SynapseError(400, "Invalid search query") rooms = yield self.store.get_rooms_for_user_where_membership_is( - user.to_string(), membership_list=[Membership.JOIN, Membership.LEAVE], + user.to_string(), + membership_list=[Membership.JOIN], + # membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban], ) room_ids = set(r.room_id for r in rooms) - rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) + rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) allowed_events = yield self._filter_events_for_client( user.to_string(), event_map.values() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 5843f80876..7a30ce25eb 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,38 +16,28 @@ from twisted.internet import defer from _base import SQLBaseStore -from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes from synapse.storage.engines import PostgresEngine class SearchStore(SQLBaseStore): @defer.inlineCallbacks - def search_msgs(self, room_ids, constraints): + def search_msgs(self, room_ids, search_term, keys): clauses = [] args = [] - fts = None clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) ) args.extend(room_ids) - for c in constraints: - local_clauses = [] - if c.search_type == SearchConstraintTypes.FTS: - fts = c.value - for key in c.keys: - local_clauses.append("key = ?") - args.append(key) - elif c.search_type == SearchConstraintTypes.EXACT: - for key in c.keys: - if key == KnownRoomEventKeys.ROOM_ID: - for value in c.value: - local_clauses.append("room_id = ?") - args.append(value) - clauses.append( - "(%s)" % (" OR ".join(local_clauses),) - ) + local_clauses = [] + for key in keys: + local_clauses.append("key = ?") + args.append(key) + + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) if isinstance(self.database_engine, PostgresEngine): sql = ( @@ -67,7 +57,7 @@ class SearchStore(SQLBaseStore): sql += " ORDER BY rank DESC" results = yield self._execute( - "search_msgs", self.cursor_to_dict, sql, *([fts] + args) + "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) ) events = yield self._get_events([r["event_id"] for r in results]) -- cgit 1.4.1 From 7ecd11accb68cc0f20e7ab84673df38413ba7cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 15:50:56 +0100 Subject: Add paranoia limit --- synapse/storage/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7a30ce25eb..1b987161e2 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -54,7 +54,7 @@ class SearchStore(SQLBaseStore): for clause in clauses: sql += " AND " + clause - sql += " ORDER BY rank DESC" + sql += " ORDER BY rank DESC LIMIT 500" results = yield self._execute( "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) -- cgit 1.4.1 From 99c7fbfef7729e6f3cceb9cea64f21d5a2c5b41f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 09:52:40 +0100 Subject: Fix to work with SQLite --- synapse/storage/room.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index e4e830944a..0527cee05d 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -19,6 +19,7 @@ from synapse.api.errors import StoreError from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks +from .engines import PostgresEngine import collections import logging @@ -202,10 +203,16 @@ class RoomStore(SQLBaseStore): ) def _store_event_search_txn(self, txn, event, key, value): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + else: + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) txn.execute(sql, (event.event_id, event.room_id, key, value,)) -- cgit 1.4.1 From 22a8c91448f710c20a6aee66ec2a452528f1d637 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:19:44 +0100 Subject: Split up run_upgrade --- synapse/storage/schema/delta/24/fts.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index b45a5fd820..0c752d8426 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -55,16 +55,24 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" ) -SQLITE_INDEX = "CREATE INDEX event_search_ev_idx ON event_search(event_id)" def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) + run_postgres_upgrade(cur) return if isinstance(database_engine, Sqlite3Engine): + run_sqlite_upgrade(cur) + return + + +def run_postgres_upgrade(cur): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) + + +def run_sqlite_upgrade(cur): cur.execute(SQLITE_TABLE) rowid = -1 @@ -113,5 +121,3 @@ def run_upgrade(cur, database_engine, *args, **kwargs): " VALUES (?,?,?,?)", rows ) - - # cur.execute(SQLITE_INDEX) -- cgit 1.4.1 From 73260ad01f067495e541a936eef4a14ba2fea5ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:24:02 +0100 Subject: Comment on the LIMIT 500 --- synapse/storage/search.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 1b987161e2..7d642e18ff 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -54,6 +54,8 @@ class SearchStore(SQLBaseStore): for clause in clauses: sql += " AND " + clause + # We add an arbitrary limit here to ensure we don't try to pull the + # entire table from the database. sql += " ORDER BY rank DESC LIMIT 500" results = yield self._execute( -- cgit 1.4.1 From 3cf9948b8d5956c05026ee734ccf65d203eb6d6b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:28:12 +0100 Subject: Add docstring --- synapse/storage/search.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7d642e18ff..6c10f9631e 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -22,6 +22,17 @@ from synapse.storage.engines import PostgresEngine class SearchStore(SQLBaseStore): @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): + """Performs a full text search over events with give keys. + + Args: + room_ids (list): List of room ids to search in + search_term (str): Search term to search for + keys (list): List of keys to search in, currently supports + "content.body", "content.name", "content.body" + + Returns: + 2-tuple of (dict event_id -> rank, dict event_id -> event) + """ clauses = [] args = [] -- cgit 1.4.1 From edb998ba23cf74de624963f61ca9c897260a3e7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 14:37:14 +0100 Subject: Explicitly check for Sqlite3Engine --- synapse/storage/search.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 6c10f9631e..dd012fa565 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,7 +16,7 @@ from twisted.internet import defer from _base import SQLBaseStore -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine class SearchStore(SQLBaseStore): @@ -56,11 +56,14 @@ class SearchStore(SQLBaseStore): " FROM plainto_tsquery('english', ?) as query, event_search" " WHERE vector @@ query" ) - else: + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "SELECT 0 as rank, event_id FROM event_search" " WHERE value MATCH ?" ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") for clause in clauses: sql += " AND " + clause -- cgit 1.4.1 From f2d698cb52883d8d43faabefdc70e2ade9ebb8b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 16:46:48 +0100 Subject: Typing --- synapse/storage/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dd012fa565..a3c69c5ab3 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -22,13 +22,13 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine class SearchStore(SQLBaseStore): @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): - """Performs a full text search over events with give keys. + """Performs a full text search over events with given keys. Args: room_ids (list): List of room ids to search in search_term (str): Search term to search for keys (list): List of keys to search in, currently supports - "content.body", "content.name", "content.body" + "content.body", "content.name", "content.topic" Returns: 2-tuple of (dict event_id -> rank, dict event_id -> event) -- cgit 1.4.1 From 46d39343d976a933c3f2dfd19e5e552c01c93bf4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 16:58:00 +0100 Subject: Explicitly check for Sqlite3Engine --- synapse/storage/room.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 0527cee05d..13441fcdce 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -19,7 +19,7 @@ from synapse.api.errors import StoreError from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks -from .engines import PostgresEngine +from .engines import PostgresEngine, Sqlite3Engine import collections import logging @@ -208,11 +208,14 @@ class RoomStore(SQLBaseStore): "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" ) - else: + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") txn.execute(sql, (event.event_id, event.room_id, key, value,)) -- cgit 1.4.1 From 68b7fc3e2ba0aae7813b0bae52370860b5cd9f26 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 19 Oct 2015 17:26:18 +0100 Subject: Add rooms that the user has left under archived in v2 sync. --- synapse/handlers/sync.py | 128 ++++++++++++++++++++++++++++++++++- synapse/rest/client/v2_alpha/sync.py | 29 ++++++-- synapse/storage/roommember.py | 13 ++++ 3 files changed, 161 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ee6b881de1..1891cd088c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -61,18 +61,37 @@ class JoinedSyncResult(collections.namedtuple("JoinedSyncResult", [ return bool(self.timeline or self.state or self.ephemeral) +class ArchivedSyncResult(collections.namedtuple("JoinedSyncResult", [ + "room_id", + "timeline", + "state", +])): + __slots__ = [] + + def __nonzero__(self): + """Make the result appear empty if there are no updates. This is used + to tell if room needs to be part of the sync result. + """ + return bool(self.timeline or self.state) + + class InvitedSyncResult(collections.namedtuple("InvitedSyncResult", [ "room_id", "invite", ])): __slots__ = [] + def __nonzero__(self): + """Invited rooms should always be reported to the client""" + return True + class SyncResult(collections.namedtuple("SyncResult", [ "next_batch", # Token for the next sync "presence", # List of presence events for the user. "joined", # JoinedSyncResult for each joined room. "invited", # InvitedSyncResult for each invited room. + "archived", # ArchivedSyncResult for each archived room. ])): __slots__ = [] @@ -156,11 +175,14 @@ class SyncHandler(BaseHandler): ) room_list = yield self.store.get_rooms_for_user_where_membership_is( user_id=sync_config.user.to_string(), - membership_list=[Membership.INVITE, Membership.JOIN] + membership_list=[ + Membership.INVITE, Membership.JOIN, Membership.LEAVE + ] ) joined = [] invited = [] + archived = [] for event in room_list: if event.membership == Membership.JOIN: room_sync = yield self.initial_sync_for_joined_room( @@ -173,11 +195,23 @@ class SyncHandler(BaseHandler): room_id=event.room_id, invite=invite, )) + elif event.membership == Membership.LEAVE: + leave_token = now_token.copy_and_replace( + "room_key", "s%d" % (event.stream_ordering,) + ) + room_sync = yield self.initial_sync_for_archived_room( + sync_config=sync_config, + room_id=event.room_id, + leave_event_id=event.event_id, + leave_token=leave_token, + ) + archived.append(room_sync) defer.returnValue(SyncResult( presence=presence, joined=joined, invited=invited, + archived=archived, next_batch=now_token, )) @@ -204,6 +238,28 @@ class SyncHandler(BaseHandler): ephemeral=[], )) + @defer.inlineCallbacks + def initial_sync_for_archived_room(self, room_id, sync_config, + leave_event_id, leave_token): + """Sync a room for a client which is starting without any state + Returns: + A Deferred JoinedSyncResult. + """ + + batch = yield self.load_filtered_recents( + room_id, sync_config, leave_token, + ) + + leave_state = yield self.store.get_state_for_events( + [leave_event_id], None + ) + + defer.returnValue(ArchivedSyncResult( + room_id=room_id, + timeline=batch, + state=leave_state[leave_event_id].values(), + )) + @defer.inlineCallbacks def incremental_sync_with_gap(self, sync_config, since_token): """ Get the incremental delta needed to bring the client up to @@ -257,18 +313,22 @@ class SyncHandler(BaseHandler): ) joined = [] + archived = [] if len(room_events) <= timeline_limit: # There is no gap in any of the rooms. Therefore we can just # partition the new events by room and return them. invite_events = [] + leave_events = [] events_by_room_id = {} for event in room_events: events_by_room_id.setdefault(event.room_id, []).append(event) if event.room_id not in joined_room_ids: if (event.type == EventTypes.Member - and event.membership == Membership.INVITE and event.state_key == sync_config.user.to_string()): - invite_events.append(event) + if event.membership == Membership.INVITE: + invite_events.append(event) + elif event.membership == Membership.LEAVE: + leave_events.append(event) for room_id in joined_room_ids: recents = events_by_room_id.get(room_id, []) @@ -296,11 +356,16 @@ class SyncHandler(BaseHandler): ) if room_sync: joined.append(room_sync) + else: invite_events = yield self.store.get_invites_for_user( sync_config.user.to_string() ) + leave_events = yield self.store.get_leave_events_for_user( + sync_config.user.to_string() + ) + for room_id in joined_room_ids: room_sync = yield self.incremental_sync_with_gap_for_room( room_id, sync_config, since_token, now_token, @@ -309,6 +374,12 @@ class SyncHandler(BaseHandler): if room_sync: joined.append(room_sync) + for leave_event in leave_events: + room_sync = yield self.incremental_sync_for_archived_room( + sync_config, leave_event, since_token + ) + archived.append(room_sync) + invited = [ InvitedSyncResult(room_id=event.room_id, invite=event) for event in invite_events @@ -318,6 +389,7 @@ class SyncHandler(BaseHandler): presence=presence, joined=joined, invited=invited, + archived=archived, next_batch=now_token, )) @@ -416,6 +488,56 @@ class SyncHandler(BaseHandler): defer.returnValue(room_sync) + @defer.inlineCallbacks + def incremental_sync_for_archived_room(self, sync_config, leave_event, + since_token): + """ Get the incremental delta needed to bring the client up to date for + the archived room. + Returns: + A Deferred ArchivedSyncResult + """ + + stream_token = yield self.store.get_stream_token_for_event( + leave_event.event_id + ) + + leave_token = since_token.copy_and_replace("room_key", stream_token) + + batch = yield self.load_filtered_recents( + leave_event.room_id, sync_config, leave_token, since_token, + ) + + logging.debug("Recents %r", batch) + + # TODO(mjark): This seems racy since this isn't being passed a + # token to indicate what point in the stream this is + leave_state = yield self.store.get_state_for_events( + [leave_event.event_id], None + ) + + state_events_at_leave = leave_state[leave_event.event_id].values() + + state_at_previous_sync = yield self.get_state_at_previous_sync( + leave_event.room_id, since_token=since_token + ) + + state_events_delta = yield self.compute_state_delta( + since_token=since_token, + previous_state=state_at_previous_sync, + current_state=state_events_at_leave, + ) + + room_sync = ArchivedSyncResult( + room_id=leave_event.room_id, + timeline=batch, + state=state_events_delta, + ) + + logging.debug("Room sync: %r", room_sync) + + defer.returnValue(room_sync) + + @defer.inlineCallbacks def get_state_at_previous_sync(self, room_id, since_token): """ Get the room state at the previous sync the client made. diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index fffecb24f5..73473a7e6b 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -136,6 +136,10 @@ class SyncRestServlet(RestServlet): sync_result.invited, filter, time_now, token_id ) + archived = self.encode_archived( + sync_result.archived, filter, time_now, token_id + ) + response_content = { "presence": self.encode_presence( sync_result.presence, filter, time_now @@ -143,7 +147,7 @@ class SyncRestServlet(RestServlet): "rooms": { "joined": joined, "invited": invited, - "archived": {}, + "archived": archived, }, "next_batch": sync_result.next_batch.to_string(), } @@ -182,14 +186,20 @@ class SyncRestServlet(RestServlet): return invited + def encode_archived(self, rooms, filter, time_now, token_id): + joined = {} + for room in rooms: + joined[room.room_id] = self.encode_room( + room, filter, time_now, token_id, joined=False + ) + + return joined + @staticmethod - def encode_room(room, filter, time_now, token_id): + def encode_room(room, filter, time_now, token_id, joined=True): event_map = {} state_events = filter.filter_room_state(room.state) - timeline_events = filter.filter_room_timeline(room.timeline.events) - ephemeral_events = filter.filter_room_ephemeral(room.ephemeral) state_event_ids = [] - timeline_event_ids = [] for event in state_events: # TODO(mjark): Respect formatting requirements in the filter. event_map[event.event_id] = serialize_event( @@ -198,6 +208,8 @@ class SyncRestServlet(RestServlet): ) state_event_ids.append(event.event_id) + timeline_events = filter.filter_room_timeline(room.timeline.events) + timeline_event_ids = [] for event in timeline_events: # TODO(mjark): Respect formatting requirements in the filter. event_map[event.event_id] = serialize_event( @@ -205,6 +217,7 @@ class SyncRestServlet(RestServlet): event_format=format_event_for_client_v2_without_event_id, ) timeline_event_ids.append(event.event_id) + result = { "event_map": event_map, "timeline": { @@ -213,8 +226,12 @@ class SyncRestServlet(RestServlet): "limited": room.timeline.limited, }, "state": {"events": state_event_ids}, - "ephemeral": {"events": ephemeral_events}, } + + if joined: + ephemeral_events = filter.filter_room_ephemeral(room.ephemeral) + result["ephemeral"] = {"events": ephemeral_events} + return result diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index dd98dcfda8..623400fd36 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -124,6 +124,19 @@ class RoomMemberStore(SQLBaseStore): invites.event_id for invite in invites ])) + def get_leave_events_for_user(self, user_id): + """ Get all the leave events for a user + Args: + user_id (str): The user ID. + Returns: + A deferred list of event objects. + """ + return self.get_rooms_for_user_where_membership_is( + user_id, [Membership.LEAVE] + ).addCallback(lambda leaves: self._get_events([ + leave.event_id for leave in leaves + ])) + def get_rooms_for_user_where_membership_is(self, user_id, membership_list): """ Get all the rooms for this user where the membership for this user matches one in the membership list. -- cgit 1.4.1 From e3d75f564ab1d17eb4ee47314f78c41553a486f1 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 21 Oct 2015 11:15:48 +0100 Subject: Include banned rooms in the archived section of v2 sync --- synapse/handlers/sync.py | 15 +++++++++------ synapse/storage/roommember.py | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5ca2606443..6cb756e476 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -175,9 +175,12 @@ class SyncHandler(BaseHandler): ) room_list = yield self.store.get_rooms_for_user_where_membership_is( user_id=sync_config.user.to_string(), - membership_list=[ - Membership.INVITE, Membership.JOIN, Membership.LEAVE - ] + membership_list=( + Membership.INVITE, + Membership.JOIN, + Membership.LEAVE, + Membership.BAN + ) ) joined = [] @@ -195,7 +198,7 @@ class SyncHandler(BaseHandler): room_id=event.room_id, invite=invite, )) - elif event.membership == Membership.LEAVE: + elif event.membership in (Membership.LEAVE, Membership.BAN): leave_token = now_token.copy_and_replace( "room_key", "s%d" % (event.stream_ordering,) ) @@ -327,7 +330,7 @@ class SyncHandler(BaseHandler): and event.state_key == sync_config.user.to_string()): if event.membership == Membership.INVITE: invite_events.append(event) - elif event.membership == Membership.LEAVE: + elif event.membership in (Membership.LEAVE, Membership.BAN): leave_events.append(event) for room_id in joined_room_ids: @@ -362,7 +365,7 @@ class SyncHandler(BaseHandler): sync_config.user.to_string() ) - leave_events = yield self.store.get_leave_events_for_user( + leave_events = yield self.store.get_leave_and_ban_events_for_user( sync_config.user.to_string() ) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 623400fd36..ae1ad56d9a 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -124,7 +124,7 @@ class RoomMemberStore(SQLBaseStore): invites.event_id for invite in invites ])) - def get_leave_events_for_user(self, user_id): + def get_leave_and_ban_events_for_user(self, user_id): """ Get all the leave events for a user Args: user_id (str): The user ID. @@ -132,7 +132,7 @@ class RoomMemberStore(SQLBaseStore): A deferred list of event objects. """ return self.get_rooms_for_user_where_membership_is( - user_id, [Membership.LEAVE] + user_id, (Membership.LEAVE, Membership.BAN) ).addCallback(lambda leaves: self._get_events([ leave.event_id for leave in leaves ])) -- cgit 1.4.1 From 4d25bc6c92c3d219786892c5d510e0c4c4eb8b96 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:12:28 +0100 Subject: Move FTS to delta 25 --- synapse/storage/prepare_database.py | 2 +- synapse/storage/schema/delta/24/fts.py | 123 --------------------------------- synapse/storage/schema/delta/25/fts.py | 123 +++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 124 deletions(-) delete mode 100644 synapse/storage/schema/delta/24/fts.py create mode 100644 synapse/storage/schema/delta/25/fts.py (limited to 'synapse/storage') diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 1ddf55be4d..1a74d6e360 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 24 +SCHEMA_VERSION = 25 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py deleted file mode 100644 index 0c752d8426..0000000000 --- a/synapse/storage/schema/delta/24/fts.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -from synapse.storage.prepare_database import get_statements -from synapse.storage.engines import PostgresEngine, Sqlite3Engine - -import ujson - -logger = logging.getLogger(__name__) - - -POSTGRES_SQL = """ -CREATE TABLE event_search ( - event_id TEXT, - room_id TEXT, - key TEXT, - vector tsvector -); - -INSERT INTO event_search SELECT - event_id, room_id, 'content.body', - to_tsvector('english', json::json->'content'->>'body') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; - -INSERT INTO event_search SELECT - event_id, room_id, 'content.name', - to_tsvector('english', json::json->'content'->>'name') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; - -INSERT INTO event_search SELECT - event_id, room_id, 'content.topic', - to_tsvector('english', json::json->'content'->>'topic') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; - - -CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); -CREATE INDEX event_search_ev_idx ON event_search(event_id); -CREATE INDEX event_search_ev_ridx ON event_search(room_id); -""" - - -SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" -) - - -def run_upgrade(cur, database_engine, *args, **kwargs): - if isinstance(database_engine, PostgresEngine): - run_postgres_upgrade(cur) - return - - if isinstance(database_engine, Sqlite3Engine): - run_sqlite_upgrade(cur) - return - - -def run_postgres_upgrade(cur): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) - - -def run_sqlite_upgrade(cur): - cur.execute(SQLITE_TABLE) - - rowid = -1 - while True: - cur.execute( - "SELECT rowid, json FROM event_json" - " WHERE rowid > ?" - " ORDER BY rowid ASC LIMIT 100", - (rowid,) - ) - - res = cur.fetchall() - - if not res: - break - - events = [ - ujson.loads(js) - for _, js in res - ] - - rowid = max(rid for rid, _ in res) - - rows = [] - for ev in events: - if ev["type"] == "m.room.message": - rows.append(( - ev["event_id"], ev["room_id"], "content.body", - ev["content"]["body"] - )) - if ev["type"] == "m.room.name": - rows.append(( - ev["event_id"], ev["room_id"], "content.name", - ev["content"]["name"] - )) - if ev["type"] == "m.room.topic": - rows.append(( - ev["event_id"], ev["room_id"], "content.topic", - ev["content"]["topic"] - )) - - if rows: - logger.info(rows) - cur.executemany( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)", - rows - ) diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py new file mode 100644 index 0000000000..fd181fc630 --- /dev/null +++ b/synapse/storage/schema/delta/25/fts.py @@ -0,0 +1,123 @@ +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage.prepare_database import get_statements +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +import ujson + +logger = logging.getLogger(__name__) + + +POSTGRES_SQL = """ +CREATE TABLE IF NOT EXISTS event_search ( + event_id TEXT, + room_id TEXT, + key TEXT, + vector tsvector +); + +INSERT INTO event_search SELECT + event_id, room_id, 'content.body', + to_tsvector('english', json::json->'content'->>'body') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.name', + to_tsvector('english', json::json->'content'->>'name') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.topic', + to_tsvector('english', json::json->'content'->>'topic') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; + + +CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); +CREATE INDEX event_search_ev_idx ON event_search(event_id); +CREATE INDEX event_search_ev_ridx ON event_search(room_id); +""" + + +SQLITE_TABLE = ( + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" +) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + run_postgres_upgrade(cur) + return + + if isinstance(database_engine, Sqlite3Engine): + run_sqlite_upgrade(cur) + return + + +def run_postgres_upgrade(cur): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) + + +def run_sqlite_upgrade(cur): + cur.execute(SQLITE_TABLE) + + rowid = -1 + while True: + cur.execute( + "SELECT rowid, json FROM event_json" + " WHERE rowid > ?" + " ORDER BY rowid ASC LIMIT 100", + (rowid,) + ) + + res = cur.fetchall() + + if not res: + break + + events = [ + ujson.loads(js) + for _, js in res + ] + + rowid = max(rid for rid, _ in res) + + rows = [] + for ev in events: + if ev["type"] == "m.room.message": + rows.append(( + ev["event_id"], ev["room_id"], "content.body", + ev["content"]["body"] + )) + if ev["type"] == "m.room.name": + rows.append(( + ev["event_id"], ev["room_id"], "content.name", + ev["content"]["name"] + )) + if ev["type"] == "m.room.topic": + rows.append(( + ev["event_id"], ev["room_id"], "content.topic", + ev["content"]["topic"] + )) + + if rows: + logger.info(rows) + cur.executemany( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)", + rows + ) -- cgit 1.4.1 From f142898f529f89c5bd90710db2d0771894b0b33f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:18:01 +0100 Subject: PEP8 --- synapse/storage/schema/delta/25/fts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index fd181fc630..ed3cc06557 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -53,7 +53,8 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" + " USING fts3 ( event_id, room_id, key, value)" ) -- cgit 1.4.1 From ba02bba88c6895bc9196d226a2bbc8047b93e28b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 13:25:22 +0100 Subject: Limit max number of SQL vars --- synapse/storage/search.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index a3c69c5ab3..810b5406ad 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -36,10 +36,12 @@ class SearchStore(SQLBaseStore): clauses = [] args = [] - clauses.append( - "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) - ) - args.extend(room_ids) + # Make sure we don't explode because the person is in too many rooms. + if len(room_ids) > 500: + clauses.append( + "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) + ) + args.extend(room_ids) local_clauses = [] for key in keys: -- cgit 1.4.1 From 232beb3a3c28ccdc5388daa9396d5054b7768b12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 15:02:35 +0100 Subject: Use namedtuple as return value --- synapse/handlers/search.py | 4 +++- synapse/storage/search.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index f53e5d35ac..bdc79ffc55 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -66,7 +66,9 @@ class SearchHandler(BaseHandler): room_ids = filtr.filter_rooms(room_ids) - rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) + rank_map, event_map, _ = yield self.store.search_msgs( + room_ids, search_term, keys + ) filtered_events = filtr.filter(event_map.values()) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 810b5406ad..41451ade57 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -18,6 +18,17 @@ from twisted.internet import defer from _base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from collections import namedtuple + +"""The result of a search. + +Fields: + rank_map (dict): Mapping event_id -> rank + event_map (dict): Mapping event_id -> event + pagination_token (str): Pagination token +""" +SearchResult = namedtuple("SearchResult", ("rank_map", "event_map", "pagination_token")) + class SearchStore(SQLBaseStore): @defer.inlineCallbacks @@ -31,7 +42,7 @@ class SearchStore(SQLBaseStore): "content.body", "content.name", "content.topic" Returns: - 2-tuple of (dict event_id -> rank, dict event_id -> event) + SearchResult """ clauses = [] args = [] @@ -85,11 +96,12 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue(( + defer.returnValue(SearchResult( { r["event_id"]: r["rank"] for r in results if r["event_id"] in event_map }, - event_map + event_map, + None )) -- cgit 1.4.1 From fb0fecd0b9f430670ca6de1f4746601dd6cc24f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:18:35 +0100 Subject: LESS THAN --- synapse/storage/search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 41451ade57..e658e07dc7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -48,7 +48,8 @@ class SearchStore(SQLBaseStore): args = [] # Make sure we don't explode because the person is in too many rooms. - if len(room_ids) > 500: + # We filter the results regardless. + if len(room_ids) < 500: clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) ) -- cgit 1.4.1 From 671ac699f1d4672bed3817b3cafb7498df99c030 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:54:56 +0100 Subject: Actually filter results --- synapse/storage/search.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e658e07dc7..9608b5d6a7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -48,7 +48,7 @@ class SearchStore(SQLBaseStore): args = [] # Make sure we don't explode because the person is in too many rooms. - # We filter the results regardless. + # We filter the results below regardless. if len(room_ids) < 500: clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) @@ -66,13 +66,13 @@ class SearchStore(SQLBaseStore): if isinstance(self.database_engine, PostgresEngine): sql = ( - "SELECT ts_rank_cd(vector, query) AS rank, event_id" + "SELECT ts_rank_cd(vector, query) AS rank, room_id, event_id" " FROM plainto_tsquery('english', ?) as query, event_search" " WHERE vector @@ query" ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( - "SELECT 0 as rank, event_id FROM event_search" + "SELECT 0 as rank, room_id, event_id FROM event_search" " WHERE value MATCH ?" ) else: @@ -90,6 +90,8 @@ class SearchStore(SQLBaseStore): "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) ) + results = filter(lambda row: row["room_id"] in room_ids, results) + events = yield self._get_events([r["event_id"] for r in results]) event_map = { -- cgit 1.4.1 From 0c36098c1fe561629651e446589a644fed9188e4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Oct 2015 13:23:48 +0100 Subject: Implement rank function for SQLite FTS --- synapse/storage/engines/sqlite3.py | 27 +++++++++++++++++++++++++++ synapse/storage/schema/delta/25/fts.py | 2 +- synapse/storage/search.py | 3 ++- 3 files changed, 30 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py index bad3b5c5ac..a5a54ec011 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite3.py @@ -17,6 +17,8 @@ from synapse.storage.prepare_database import ( prepare_database, prepare_sqlite3_database ) +import struct + class Sqlite3Engine(object): single_threaded = True @@ -32,6 +34,7 @@ class Sqlite3Engine(object): def on_new_connection(self, db_conn): self.prepare_database(db_conn) + db_conn.create_function("rank", 1, _rank) def prepare_database(self, db_conn): prepare_sqlite3_database(db_conn) @@ -45,3 +48,27 @@ class Sqlite3Engine(object): def lock_table(self, txn, table): return + + +# Following functions taken from: https://github.com/coleifer/peewee + +def _parse_match_info(buf): + bufsize = len(buf) + return [struct.unpack('@I', buf[i:i+4])[0] for i in range(0, bufsize, 4)] + + +def _rank(raw_match_info): + """Handle match_info called w/default args 'pcx' - based on the example rank + function http://sqlite.org/fts3.html#appendix_a + """ + match_info = _parse_match_info(raw_match_info) + score = 0.0 + p, c = match_info[:2] + for phrase_num in range(p): + phrase_info_idx = 2 + (phrase_num * c * 3) + for col_num in range(c): + col_idx = phrase_info_idx + (col_num * 3) + x1, x2 = match_info[col_idx:col_idx + 2] + if x1 > 0: + score += float(x1) / x2 + return score diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index ed3cc06557..3f0b02d119 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -54,7 +54,7 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" - " USING fts3 ( event_id, room_id, key, value)" + " USING fts4 ( event_id, room_id, key, value )" ) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 9608b5d6a7..cdf003502f 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -72,7 +72,8 @@ class SearchStore(SQLBaseStore): ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( - "SELECT 0 as rank, room_id, event_id FROM event_search" + "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" + " FROM event_search" " WHERE value MATCH ?" ) else: -- cgit 1.4.1 From 4cf633d5e9628a56cf9b400ee3e073fcdcb365f0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Oct 2015 15:41:36 +0100 Subject: Pull out sender when computing search results --- synapse/storage/schema/delta/25/fts.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index 3f0b02d119..056487af30 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -26,22 +26,23 @@ POSTGRES_SQL = """ CREATE TABLE IF NOT EXISTS event_search ( event_id TEXT, room_id TEXT, + sender TEXT, key TEXT, vector tsvector ); INSERT INTO event_search SELECT - event_id, room_id, 'content.body', + event_id, room_id, json::json->>'sender', 'content.body', to_tsvector('english', json::json->'content'->>'body') FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; INSERT INTO event_search SELECT - event_id, room_id, 'content.name', + event_id, room_id, json::json->>'sender', 'content.name', to_tsvector('english', json::json->'content'->>'name') FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; INSERT INTO event_search SELECT - event_id, room_id, 'content.topic', + event_id, room_id, json::json->>'sender', 'content.topic', to_tsvector('english', json::json->'content'->>'topic') FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; @@ -99,26 +100,28 @@ def run_sqlite_upgrade(cur): rows = [] for ev in events: - if ev["type"] == "m.room.message": + content = ev.get("content", {}) + body = content.get("body", None) + name = content.get("name", None) + topic = content.get("topic", None) + sender = ev.get("sender", None) + if ev["type"] == "m.room.message" and body: rows.append(( - ev["event_id"], ev["room_id"], "content.body", - ev["content"]["body"] + ev["event_id"], ev["room_id"], sender, "content.body", body )) - if ev["type"] == "m.room.name": + if ev["type"] == "m.room.name" and name: rows.append(( - ev["event_id"], ev["room_id"], "content.name", - ev["content"]["name"] + ev["event_id"], ev["room_id"], sender, "content.name", name )) - if ev["type"] == "m.room.topic": + if ev["type"] == "m.room.topic" and topic: rows.append(( - ev["event_id"], ev["room_id"], "content.topic", - ev["content"]["topic"] + ev["event_id"], ev["room_id"], sender, "content.topic", topic )) if rows: logger.info(rows) cur.executemany( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)", + "INSERT INTO event_search (event_id, room_id, sender, key, value)" + " VALUES (?,?,?,?,?)", rows ) -- cgit 1.4.1 From 5cb298c934adf9c7f42d06ea1ac74ab2bc651c23 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Oct 2015 13:45:56 +0000 Subject: Add room context api --- synapse/handlers/__init__.py | 3 +- synapse/handlers/room.py | 42 ++++++++++++++++ synapse/rest/client/v1/room.py | 36 +++++++++++++ synapse/storage/stream.py | 111 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 190 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 87b4d381c7..6a2339f2eb 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -17,7 +17,7 @@ from synapse.appservice.scheduler import AppServiceScheduler from synapse.appservice.api import ApplicationServiceApi from .register import RegistrationHandler from .room import ( - RoomCreationHandler, RoomMemberHandler, RoomListHandler + RoomCreationHandler, RoomMemberHandler, RoomListHandler, RoomContextHandler, ) from .message import MessageHandler from .events import EventStreamHandler, EventHandler @@ -70,3 +70,4 @@ class Handlers(object): self.auth_handler = AuthHandler(hs) self.identity_handler = IdentityHandler(hs) self.search_handler = SearchHandler(hs) + self.room_context_handler = RoomContextHandler(hs) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 60f9fa58b0..dd93a5d04d 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -33,6 +33,7 @@ from collections import OrderedDict from unpaddedbase64 import decode_base64 import logging +import math import string logger = logging.getLogger(__name__) @@ -747,6 +748,47 @@ class RoomListHandler(BaseHandler): defer.returnValue({"start": "START", "end": "END", "chunk": chunk}) +class RoomContextHandler(BaseHandler): + @defer.inlineCallbacks + def get_event_context(self, user, room_id, event_id, limit): + before_limit = math.floor(limit/2.) + after_limit = limit - before_limit + + now_token = yield self.hs.get_event_sources().get_current_token() + + results = yield self.store.get_events_around( + room_id, event_id, before_limit, after_limit + ) + + results["events_before"] = yield self._filter_events_for_client( + user.to_string(), results["events_before"] + ) + + results["events_after"] = yield self._filter_events_for_client( + user.to_string(), results["events_after"] + ) + + if results["events_after"]: + last_event_id = results["events_after"][-1].event_id + else: + last_event_id = event_id + + state = yield self.store.get_state_for_events( + [last_event_id], None + ) + results["state"] = state[last_event_id].values() + + results["start"] = now_token.copy_and_replace( + "room_key", results["start"] + ).to_string() + + results["end"] = now_token.copy_and_replace( + "room_key", results["end"] + ).to_string() + + defer.returnValue(results) + + class RoomEventSource(object): def __init__(self, hs): self.store = hs.get_datastore() diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 4cee1c1599..2dcaee86cd 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -397,6 +397,41 @@ class RoomTriggerBackfill(ClientV1RestServlet): defer.returnValue((200, res)) +class RoomEventContext(ClientV1RestServlet): + PATTERN = client_path_pattern( + "/rooms/(?P[^/]*)/context/(?P[^/]*)$" + ) + + def __init__(self, hs): + super(RoomEventContext, self).__init__(hs) + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_GET(self, request, room_id, event_id): + user, _ = yield self.auth.get_user_by_req(request) + + limit = int(request.args.get("limit", [10])[0]) + + results = yield self.handlers.room_context_handler.get_event_context( + user, room_id, event_id, limit, + ) + + time_now = self.clock.time_msec() + results["events_before"] = [ + serialize_event(event, time_now) for event in results["events_before"] + ] + results["events_after"] = [ + serialize_event(event, time_now) for event in results["events_after"] + ] + results["state"] = [ + serialize_event(event, time_now) for event in results["state"] + ] + + logger.info("Responding with %r", results) + + defer.returnValue((200, results)) + + # TODO: Needs unit testing class RoomMembershipRestServlet(ClientV1RestServlet): @@ -628,3 +663,4 @@ def register_servlets(hs, http_server): RoomRedactEventRestServlet(hs).register(http_server) RoomTypingRestServlet(hs).register(http_server) SearchRestServlet(hs).register(http_server) + RoomEventContext(hs).register(http_server) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 3cab06fdef..f2eecf52f9 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -23,7 +23,7 @@ paginate bacwards. This is implemented by keeping two ordering columns: stream_ordering and topological_ordering. Stream ordering is basically insertion/received order -(except for events from backfill requests). The topolgical_ordering is a +(except for events from backfill requests). The topological_ordering is a weak ordering of events based on the pdu graph. This means that we have to have two different types of tokens, depending on @@ -436,3 +436,112 @@ class StreamStore(SQLBaseStore): internal = event.internal_metadata internal.before = str(RoomStreamToken(topo, stream - 1)) internal.after = str(RoomStreamToken(topo, stream)) + + @defer.inlineCallbacks + def get_events_around(self, room_id, event_id, before_limit, after_limit): + results = yield self.runInteraction( + "get_events_around", self._get_events_around_txn, + room_id, event_id, before_limit, after_limit + ) + + events_before = yield self._get_events( + [e for e in results["before"]["event_ids"]], + get_prev_content=True + ) + + events_after = yield self._get_events( + [e for e in results["after"]["event_ids"]], + get_prev_content=True + ) + + defer.returnValue({ + "events_before": events_before, + "events_after": events_after, + "start": results["before"]["token"], + "end": results["after"]["token"], + }) + + def _get_events_around_txn(self, txn, room_id, event_id, before_limit, after_limit): + results = self._simple_select_one_txn( + txn, + "events", + keyvalues={ + "event_id": event_id, + "room_id": room_id, + }, + retcols=["stream_ordering", "topological_ordering"], + ) + + stream_ordering = results["stream_ordering"] + topological_ordering = results["topological_ordering"] + + query_before = ( + "SELECT topological_ordering, stream_ordering, event_id FROM events" + " WHERE room_id = ? AND (topological_ordering < ?" + " OR (topological_ordering = ? AND stream_ordering < ?))" + " ORDER BY topological_ordering DESC, stream_ordering DESC" + " LIMIT ?" + ) + + query_after = ( + "SELECT topological_ordering, stream_ordering, event_id FROM events" + " WHERE room_id = ? AND (topological_ordering > ?" + " OR (topological_ordering = ? AND stream_ordering > ?))" + " ORDER BY topological_ordering ASC, stream_ordering ASC" + " LIMIT ?" + ) + + txn.execute( + query_before, + ( + room_id, topological_ordering, topological_ordering, + stream_ordering, before_limit, + ) + ) + + rows = self.cursor_to_dict(txn) + events_before = [r["event_id"] for r in rows] + + if rows: + start_token = str(RoomStreamToken( + rows[0]["topological_ordering"], + rows[0]["stream_ordering"] - 1, + )) + else: + start_token = str(RoomStreamToken( + topological_ordering, + stream_ordering - 1, + )) + + txn.execute( + query_after, + ( + room_id, topological_ordering, topological_ordering, + stream_ordering, after_limit, + ) + ) + + rows = self.cursor_to_dict(txn) + events_after = [r["event_id"] for r in rows] + + if rows: + end_token = str(RoomStreamToken( + rows[-1]["topological_ordering"], + rows[-1]["stream_ordering"], + )) + else: + end_token = str(RoomStreamToken( + topological_ordering, + stream_ordering, + )) + + return { + "before": { + "event_ids": events_before, + "token": start_token, + }, + "after": { + "event_ids": events_after, + "token": end_token, + }, + } -- cgit 1.4.1 From 56dbcd1524d855e0bea2a77e371b1732f82a9492 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Oct 2015 14:05:50 +0000 Subject: Docs --- synapse/handlers/room.py | 13 +++++++++++++ synapse/storage/stream.py | 26 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index dd93a5d04d..36878a6c20 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -751,6 +751,19 @@ class RoomListHandler(BaseHandler): class RoomContextHandler(BaseHandler): @defer.inlineCallbacks def get_event_context(self, user, room_id, event_id, limit): + """Retrieves events, pagination tokens and state around a given event + in a room. + + Args: + user (UserID) + room_id (str) + event_id (str) + limit (int): The maximum number of events to return in total + (excluding state). + + Returns: + dict + """ before_limit = math.floor(limit/2.) after_limit = limit - before_limit diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index f2eecf52f9..15d4c2bf68 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -439,6 +439,19 @@ class StreamStore(SQLBaseStore): @defer.inlineCallbacks def get_events_around(self, room_id, event_id, before_limit, after_limit): + """Retrieve events and pagination tokens around a given event in a + room. + + Args: + room_id (str) + event_id (str) + before_limit (int) + after_limit (int) + + Returns: + dict + """ + results = yield self.runInteraction( "get_events_around", self._get_events_around_txn, room_id, event_id, before_limit, after_limit @@ -462,6 +475,19 @@ class StreamStore(SQLBaseStore): }) def _get_events_around_txn(self, txn, room_id, event_id, before_limit, after_limit): + """Retrieves event_ids and pagination tokens around a given event in a + room. + + Args: + room_id (str) + event_id (str) + before_limit (int) + after_limit (int) + + Returns: + dict + """ + results = self._simple_select_one_txn( txn, "events", -- cgit 1.4.1 From 621e84d9a0f06f65bd51171079fd18eb916ab81a Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Fri, 30 Oct 2015 16:25:53 +0000 Subject: Add missing column --- synapse/storage/schema/delta/25/fts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index 056487af30..b7cd0ce3b8 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -55,7 +55,7 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" - " USING fts4 ( event_id, room_id, key, value )" + " USING fts4 ( event_id, room_id, sender, key, value )" ) -- cgit 1.4.1