From 892e70ec8404865b4b1e6cf4eef7a3ada7114149 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 28 Oct 2015 16:06:57 +0000 Subject: Add APIs for adding and removing tags from rooms --- synapse/storage/__init__.py | 2 + synapse/storage/schema/delta/25/tags.sql | 37 ++++++ synapse/storage/tags.py | 190 +++++++++++++++++++++++++++++++ 3 files changed, 229 insertions(+) create mode 100644 synapse/storage/schema/delta/25/tags.sql create mode 100644 synapse/storage/tags.py (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index a1bd9c4ce9..e7443f2838 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -41,6 +41,7 @@ from .end_to_end_keys import EndToEndKeyStore from .receipts import ReceiptsStore from .search import SearchStore +from .tags import TagsStore import logging @@ -71,6 +72,7 @@ class DataStore(RoomMemberStore, RoomStore, ReceiptsStore, EndToEndKeyStore, SearchStore, + TagsStore, ): def __init__(self, hs): diff --git a/synapse/storage/schema/delta/25/tags.sql b/synapse/storage/schema/delta/25/tags.sql new file mode 100644 index 0000000000..168766dcf3 --- /dev/null +++ b/synapse/storage/schema/delta/25/tags.sql @@ -0,0 +1,37 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE IF NOT EXISTS room_tags( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + tag TEXT NOT NULL, -- The name of the tag. + CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) +); + +CREATE TABLE IF NOT EXISTS room_tags_revisions ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + stream_id BIGINT NOT NULL, -- The current version of the room tags. + CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) +); + +CREATE TABLE IF NOT EXISTS private_user_data_max_stream_id( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); + +INSERT INTO private_user_data_max_stream_id (stream_id) VALUES (0); diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py new file mode 100644 index 0000000000..507e60596f --- /dev/null +++ b/synapse/storage/tags.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached +from twisted.internet import defer +from .util.id_generators import StreamIdGenerator + +import logging + +logger = logging.getLogger(__name__) + + +class TagsStore(SQLBaseStore): + def __init__(self, hs): + super(TagsStore, self).__init__(hs) + + self._private_user_data_id_gen = StreamIdGenerator( + "private_user_data_max_stream_id", "stream_id" + ) + + @cached() + def get_tags_for_user(self, user_id): + """Get all the tags for a user. + + + Args: + user_id(str): The user to get the tags for. + Returns: + A deferred dict mapping from room_id strings to lists of tag + strings. + """ + + deferred = self._simple_select_list( + "room_tags", {"user_id": user_id}, ["room_id", "tag"] + ) + + @deferred.addCallback + def tags_by_room(rows): + tags_by_room = {} + for row in rows: + tags_by_room.setdefault(row["room_id"], []).append(row["tag"]) + return tags_by_room + + return deferred + + @defer.inlineCallbacks + def get_updated_tags(self, user_id, stream_id): + """Get all the tags for the rooms where the tags have changed since the + given version + + Args: + user_id(str): The user to get the tags for. + stream_id(int): The earliest update to get for the user. + Returns: + A deferred dict mapping from room_id strings to lists of tag + strings for all the rooms that changed since the stream_id token. + """ + def get_updated_tags_txn(txn): + sql = ( + "SELECT room_id from room_tags_revisions" + " WHERE user_id = ? AND stream_id > ?" + ) + txn.execute(sql, (user_id, stream_id)) + room_ids = [row[0] for row in txn.fetchall()] + return room_ids + + room_ids = yield self.runInteraction( + "get_updated_tags", get_updated_tags_txn + ) + + results = {} + if room_ids: + tags_by_room = yield self.get_tags_for_user(self, user_id) + for room_id in rooms_ids: + results[room_id] = tags_by_room[room_id] + + defer.returnValue(results) + + def get_tags_for_room(self, user_id, room_id): + """Get all the tags for the given room + Args: + user_id(str): The user to get tags for + room_id(str): The room to get tags for + Returns: + A deferred list of string tags. + """ + return self._simple_select_onecol( + table="room_tags", + keyvalues={"user_id": user_id, "room_id": room_id}, + retcol="tag", + desc="get_tags_for_room", + ) + + @defer.inlineCallbacks + def add_tag_to_room(self, user_id, room_id, tag): + """Add a tag to a room for a user. + Returns: + A deferred that completes once the tag has been added. + """ + def add_tag_txn(txn, next_id): + sql = ( + "INSERT INTO room_tags (user_id, room_id, tag)" + " VALUES (?, ?, ?)" + ) + try: + txn.execute(sql, (user_id, room_id, tag)) + except database_engine.module.IntegrityError as e: + # Return early if the row is already in the table + # and we don't need to bump the revision number of the + # private_user_data. + return + self._update_revision_txn(txn, user_id, room_id, next_id) + + with (yield self._private_user_data_id_gen.get_next(self)) as next_id: + yield self.runInteraction("add_tag", add_tag_txn, next_id) + + self.get_tags_for_user.invalidate((user_id,)) + + @defer.inlineCallbacks + def remove_tag_from_room(self, user_id, room_id, tag): + """Remove a tag from a room for a user. + Returns: + A deffered that completes once the tag has been removed + """ + def remove_tag_txn(txn, next_id): + sql = ( + "DELETE FROM room_tags " + " WHERE user_id = ? AND room_id = ? AND tag = ?" + ) + txn.execute(sql, (user_id, room_id, tag)) + self._update_revision_txn(txn, user_id, room_id, next_id) + + with (yield self._private_user_data_id_gen.get_next(self)) as next_id: + yield self.runInteraction("remove_tag", remove_tag_txn, next_id) + + self.get_tags_for_user.invalidate((user_id,)) + + def _update_revision_txn(self, txn, user_id, room_id, next_id): + """Update the latest revision of the tags for the given user and room. + + Args: + txn: The database cursor + user_id(str): The ID of the user. + room_id(str): The ID of the room. + next_id(int): The the revision to advance to. + """ + + update_max_id_sql = ( + "UPDATE private_user_data_max_stream_id" + " SET stream_id = ?" + " WHERE stream_id < ?" + ) + txn.execute(update_max_id_sql, (next_id, next_id)) + + update_sql = ( + "UPDATE room_tags_revisions" + " SET stream_id = ?" + " WHERE user_id = ?" + " AND room_id = ?" + ) + txn.execute(update_sql, (next_id, user_id, room_id)) + + if txn.rowcount == 0: + insert_sql = ( + "INSERT INTO room_tags_revisions (user_id, room_id, stream_id)" + " VALUES (?, ?, ?)" + ) + try: + txn.execute(insert_sql, (user_id, room_id, next_id)) + except database_engine.module.IntegrityError as e: + # Ignore insertion errors. It doesn't matter if the row wasn't + # inserted because if two updates happend concurrently the one + # with the higher stream_id will not be reported to a client + # unless the previous update has completed. It doesn't matter + # which stream_id ends up in the table, as long as it is higher + # than the id that the client has. + pass -- cgit 1.5.1 From a89b86dc473f5dc41a17207a17165b27d8f599ea Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 28 Oct 2015 16:45:57 +0000 Subject: Fix pyflakes errors --- synapse/rest/client/v2_alpha/tags.py | 2 ++ synapse/storage/tags.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/rest/client/v2_alpha/tags.py b/synapse/rest/client/v2_alpha/tags.py index c4a670c5a7..15c9347fc1 100644 --- a/synapse/rest/client/v2_alpha/tags.py +++ b/synapse/rest/client/v2_alpha/tags.py @@ -16,6 +16,7 @@ from ._base import client_v2_pattern from synapse.http.servlet import RestServlet +from synapse.api.errors import AuthError from twisted.internet import defer @@ -56,6 +57,7 @@ class TagServlet(RestServlet): PATTERN = client_v2_pattern( "/user/(?P[^/]*)/rooms/(?P[^/]*)/tags/(?P[^/]*)" ) + def __init__(self, hs): super(TagServlet, self).__init__() self.auth = hs.get_auth() diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 507e60596f..64c65fc321 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -84,7 +84,7 @@ class TagsStore(SQLBaseStore): results = {} if room_ids: tags_by_room = yield self.get_tags_for_user(self, user_id) - for room_id in rooms_ids: + for room_id in room_ids: results[room_id] = tags_by_room[room_id] defer.returnValue(results) @@ -117,7 +117,7 @@ class TagsStore(SQLBaseStore): ) try: txn.execute(sql, (user_id, room_id, tag)) - except database_engine.module.IntegrityError as e: + except self.database_engine.module.IntegrityError: # Return early if the row is already in the table # and we don't need to bump the revision number of the # private_user_data. @@ -180,7 +180,7 @@ class TagsStore(SQLBaseStore): ) try: txn.execute(insert_sql, (user_id, room_id, next_id)) - except database_engine.module.IntegrityError as e: + except self.database_engine.module.IntegrityError: # Ignore insertion errors. It doesn't matter if the row wasn't # inserted because if two updates happend concurrently the one # with the higher stream_id will not be reported to a client -- cgit 1.5.1 From f40b0ed5e190a78ed6633505c4f437b6fddc41ee Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 29 Oct 2015 15:20:52 +0000 Subject: Inform the client of new room tags using v1 /events --- synapse/handlers/private_user_data.py | 46 +++++++++++++++++++++++++++++++++++ synapse/notifier.py | 2 +- synapse/rest/client/v2_alpha/tags.py | 14 ++++++++--- synapse/storage/tags.py | 16 +++++++++++- synapse/streams/events.py | 5 ++++ synapse/types.py | 22 ++++++++++------- 6 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 synapse/handlers/private_user_data.py (limited to 'synapse/storage') diff --git a/synapse/handlers/private_user_data.py b/synapse/handlers/private_user_data.py new file mode 100644 index 0000000000..1778c71325 --- /dev/null +++ b/synapse/handlers/private_user_data.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + + +class PrivateUserDataEventSource(object): + def __init__(self, hs): + self.store = hs.get_datastore() + + def get_current_key(self, direction='f'): + return self.store.get_max_private_user_data_stream_id() + + @defer.inlineCallbacks + def get_new_events_for_user(self, user, from_key, limit): + user_id = user.to_string() + last_stream_id = from_key + + current_stream_id = yield self.store.get_max_private_user_data_stream_id() + tags = yield self.store.get_updated_tags(user_id, last_stream_id) + + results = [] + for room_id, room_tags in tags.items(): + results.append({ + "type": "m.tag", + "content": {"tags": room_tags}, + "room_id": room_id, + }) + + defer.returnValue((results, current_stream_id)) + + @defer.inlineCallbacks + def get_pagination_rows(self, user, config, key): + defer.returnValue(([], config.to_id)) diff --git a/synapse/notifier.py b/synapse/notifier.py index f998fc83bf..a78ee3c1e7 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -270,7 +270,7 @@ class Notifier(object): @defer.inlineCallbacks def wait_for_events(self, user, rooms, timeout, callback, - from_token=StreamToken("s0", "0", "0", "0")): + from_token=StreamToken("s0", "0", "0", "0", "0")): """Wait until the callback returns a non empty response or the timeout fires. """ diff --git a/synapse/rest/client/v2_alpha/tags.py b/synapse/rest/client/v2_alpha/tags.py index 15c9347fc1..486add9909 100644 --- a/synapse/rest/client/v2_alpha/tags.py +++ b/synapse/rest/client/v2_alpha/tags.py @@ -62,6 +62,7 @@ class TagServlet(RestServlet): super(TagServlet, self).__init__() self.auth = hs.get_auth() self.store = hs.get_datastore() + self.notifier = hs.get_notifier() @defer.inlineCallbacks def on_PUT(self, request, user_id, room_id, tag): @@ -69,9 +70,12 @@ class TagServlet(RestServlet): if user_id != auth_user.to_string(): raise AuthError(403, "Cannot add tags for other users.") - yield self.store.add_tag_to_room(user_id, room_id, tag) + max_id = yield self.store.add_tag_to_room(user_id, room_id, tag) + + yield self.notifier.on_new_event( + "private_user_data_key", max_id, users=[user_id] + ) - # TODO: poke the notifier. defer.returnValue((200, {})) @defer.inlineCallbacks @@ -80,7 +84,11 @@ class TagServlet(RestServlet): if user_id != auth_user.to_string(): raise AuthError(403, "Cannot add tags for other users.") - yield self.store.remove_tag_from_room(user_id, room_id, tag) + max_id = yield self.store.remove_tag_from_room(user_id, room_id, tag) + + yield self.notifier.on_new_event( + "private_user_data_key", max_id, users=[user_id] + ) # TODO: poke the notifier. defer.returnValue((200, {})) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 64c65fc321..2d5c49144a 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -31,6 +31,14 @@ class TagsStore(SQLBaseStore): "private_user_data_max_stream_id", "stream_id" ) + def get_max_private_user_data_stream_id(self): + """Get the current max stream id for the private user data stream + + Returns: + A deferred int. + """ + return self._private_user_data_id_gen.get_max_token(self) + @cached() def get_tags_for_user(self, user_id): """Get all the tags for a user. @@ -83,7 +91,7 @@ class TagsStore(SQLBaseStore): results = {} if room_ids: - tags_by_room = yield self.get_tags_for_user(self, user_id) + tags_by_room = yield self.get_tags_for_user(user_id) for room_id in room_ids: results[room_id] = tags_by_room[room_id] @@ -129,6 +137,9 @@ class TagsStore(SQLBaseStore): self.get_tags_for_user.invalidate((user_id,)) + result = yield self._private_user_data_id_gen.get_max_token(self) + defer.returnValue(result) + @defer.inlineCallbacks def remove_tag_from_room(self, user_id, room_id, tag): """Remove a tag from a room for a user. @@ -148,6 +159,9 @@ class TagsStore(SQLBaseStore): self.get_tags_for_user.invalidate((user_id,)) + result = yield self._private_user_data_id_gen.get_max_token(self) + defer.returnValue(result) + def _update_revision_txn(self, txn, user_id, room_id, next_id): """Update the latest revision of the tags for the given user and room. diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 699083ae12..f0d68b5bf2 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -21,6 +21,7 @@ from synapse.handlers.presence import PresenceEventSource from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource from synapse.handlers.receipts import ReceiptEventSource +from synapse.handlers.private_user_data import PrivateUserDataEventSource class EventSources(object): @@ -29,6 +30,7 @@ class EventSources(object): "presence": PresenceEventSource, "typing": TypingNotificationEventSource, "receipt": ReceiptEventSource, + "private_user_data": PrivateUserDataEventSource, } def __init__(self, hs): @@ -52,5 +54,8 @@ class EventSources(object): receipt_key=( yield self.sources["receipt"].get_current_key() ), + private_user_data_key=( + yield self.sources["private_user_data"].get_current_key() + ), ) defer.returnValue(token) diff --git a/synapse/types.py b/synapse/types.py index 9cffc33d27..8d3a8d88cc 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -98,10 +98,13 @@ class EventID(DomainSpecificString): class StreamToken( - namedtuple( - "Token", - ("room_key", "presence_key", "typing_key", "receipt_key") - ) + namedtuple("Token", ( + "room_key", + "presence_key", + "typing_key", + "receipt_key", + "private_user_data_key", + )) ): _SEPARATOR = "_" @@ -128,13 +131,14 @@ class StreamToken( else: return int(self.room_key[1:].split("-")[-1]) - def is_after(self, other_token): + def is_after(self, other): """Does this token contain events that the other doesn't?""" return ( - (other_token.room_stream_id < self.room_stream_id) - or (int(other_token.presence_key) < int(self.presence_key)) - or (int(other_token.typing_key) < int(self.typing_key)) - or (int(other_token.receipt_key) < int(self.receipt_key)) + (other.room_stream_id < self.room_stream_id) + or (int(other.presence_key) < int(self.presence_key)) + or (int(other.typing_key) < int(self.typing_key)) + or (int(other.receipt_key) < int(self.receipt_key)) + or (int(other.private_user_data_key) < int(self.private_user_data_key)) ) def copy_and_advance(self, key, new_value): -- cgit 1.5.1 From ddd8566f415ab3a6092aa4947e5d2aebf67109fc Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 2 Nov 2015 15:11:31 +0000 Subject: Store room tag content and return the content in the m.tag event --- synapse/handlers/message.py | 6 ++--- synapse/rest/client/v2_alpha/tags.py | 12 +++++++-- synapse/storage/schema/delta/25/tags.sql | 1 + synapse/storage/tags.py | 44 ++++++++++++++++++++------------ 4 files changed, 41 insertions(+), 22 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8f156e5c84..0f947993d1 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -405,7 +405,6 @@ class MessageHandler(BaseHandler): tags = tags_by_room.get(event.room_id) if tags: private_user_data.append({ - "room_id": event.room_id, "type": "m.tag", "content": {"tags": tags}, }) @@ -466,7 +465,6 @@ class MessageHandler(BaseHandler): private_user_data.append({ "type": "m.tag", "content": {"tags": tags}, - "room_id": room_id, }) result["private_user_data"] = private_user_data @@ -499,8 +497,8 @@ class MessageHandler(BaseHandler): user_id, messages ) - start_token = StreamToken(token[0], 0, 0, 0) - end_token = StreamToken(token[1], 0, 0, 0) + start_token = StreamToken(token[0], 0, 0, 0, 0) + end_token = StreamToken(token[1], 0, 0, 0, 0) time_now = self.clock.time_msec() diff --git a/synapse/rest/client/v2_alpha/tags.py b/synapse/rest/client/v2_alpha/tags.py index 486add9909..4e3f917fc5 100644 --- a/synapse/rest/client/v2_alpha/tags.py +++ b/synapse/rest/client/v2_alpha/tags.py @@ -16,12 +16,14 @@ from ._base import client_v2_pattern from synapse.http.servlet import RestServlet -from synapse.api.errors import AuthError +from synapse.api.errors import AuthError, SynapseError from twisted.internet import defer import logging +import simplejson as json + logger = logging.getLogger(__name__) @@ -70,7 +72,13 @@ class TagServlet(RestServlet): if user_id != auth_user.to_string(): raise AuthError(403, "Cannot add tags for other users.") - max_id = yield self.store.add_tag_to_room(user_id, room_id, tag) + try: + content_bytes = request.content.read() + body = json.loads(content_bytes) + except: + raise SynapseError(400, "Invalid tag JSON") + + max_id = yield self.store.add_tag_to_room(user_id, room_id, tag, body) yield self.notifier.on_new_event( "private_user_data_key", max_id, users=[user_id] diff --git a/synapse/storage/schema/delta/25/tags.sql b/synapse/storage/schema/delta/25/tags.sql index 168766dcf3..527424c998 100644 --- a/synapse/storage/schema/delta/25/tags.sql +++ b/synapse/storage/schema/delta/25/tags.sql @@ -18,6 +18,7 @@ CREATE TABLE IF NOT EXISTS room_tags( user_id TEXT NOT NULL, room_id TEXT NOT NULL, tag TEXT NOT NULL, -- The name of the tag. + content TEXT NOT NULL, -- The JSON content of the tag. CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) ); diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 2d5c49144a..34aa38c06a 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -18,6 +18,7 @@ from synapse.util.caches.descriptors import cached from twisted.internet import defer from .util.id_generators import StreamIdGenerator +import ujson as json import logging logger = logging.getLogger(__name__) @@ -52,14 +53,15 @@ class TagsStore(SQLBaseStore): """ deferred = self._simple_select_list( - "room_tags", {"user_id": user_id}, ["room_id", "tag"] + "room_tags", {"user_id": user_id}, ["room_id", "tag", "content"] ) @deferred.addCallback def tags_by_room(rows): tags_by_room = {} for row in rows: - tags_by_room.setdefault(row["room_id"], []).append(row["tag"]) + room_tags = tags_by_room.setdefault(row["room_id"], {}) + room_tags[row["tag"]] = json.loads(row["content"]) return tags_by_room return deferred @@ -105,31 +107,41 @@ class TagsStore(SQLBaseStore): Returns: A deferred list of string tags. """ - return self._simple_select_onecol( + return self._simple_select_list( table="room_tags", keyvalues={"user_id": user_id, "room_id": room_id}, - retcol="tag", + retcols=("tag", "content"), desc="get_tags_for_room", - ) + ).addCallback(lambda rows: { + row["tag"]: json.loads(row["content"]) for row in rows + }) @defer.inlineCallbacks - def add_tag_to_room(self, user_id, room_id, tag): + def add_tag_to_room(self, user_id, room_id, tag, content): """Add a tag to a room for a user. + Args: + user_id(str): The user to add a tag for. + room_id(str): The room to add a tag for. + tag(str): The tag name to add. + content(dict): A json object to associate with the tag. Returns: A deferred that completes once the tag has been added. """ + content_json = json.dumps(content) + def add_tag_txn(txn, next_id): - sql = ( - "INSERT INTO room_tags (user_id, room_id, tag)" - " VALUES (?, ?, ?)" + self._simple_upsert_txn( + txn, + table="room_tags", + keyvalues={ + "user_id": user_id, + "room_id": room_id, + "tag": tag, + }, + values={ + "content": content_json, + } ) - try: - txn.execute(sql, (user_id, room_id, tag)) - except self.database_engine.module.IntegrityError: - # Return early if the row is already in the table - # and we don't need to bump the revision number of the - # private_user_data. - return self._update_revision_txn(txn, user_id, room_id, next_id) with (yield self._private_user_data_id_gen.get_next(self)) as next_id: -- cgit 1.5.1 From 771ca56c886dd08f707447cfff70acd3ba73e98c Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Mon, 2 Nov 2015 15:31:57 +0000 Subject: Remove more unused parameters --- synapse/handlers/room.py | 1 - synapse/handlers/sync.py | 1 - synapse/storage/stream.py | 3 +-- tests/storage/test_redaction.py | 4 ---- tests/storage/test_stream.py | 4 ---- tests/utils.py | 2 +- 6 files changed, 2 insertions(+), 13 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 36878a6c20..9184dcd048 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -827,7 +827,6 @@ class RoomEventSource(object): user_id=user.to_string(), from_key=from_key, to_key=to_key, - room_id=None, limit=limit, ) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index eaa14f38df..4054efe555 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -342,7 +342,6 @@ class SyncHandler(BaseHandler): sync_config.user.to_string(), from_key=since_token.room_key, to_key=now_token.room_key, - room_id=None, limit=timeline_limit + 1, ) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 15d4c2bf68..c728013f4c 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -158,8 +158,7 @@ class StreamStore(SQLBaseStore): defer.returnValue(results) @log_function - def get_room_events_stream(self, user_id, from_key, to_key, room_id, - limit=0): + def get_room_events_stream(self, user_id, from_key, to_key, limit=0): current_room_membership_sql = ( "SELECT m.room_id FROM room_memberships as m " " INNER JOIN current_state_events as c" diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index b57006fcb4..dbf9700e6a 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -120,7 +120,6 @@ class RedactionTestCase(unittest.TestCase): self.u_alice.to_string(), start, end, - None, # Is currently ignored ) self.assertEqual(1, len(results)) @@ -149,7 +148,6 @@ class RedactionTestCase(unittest.TestCase): self.u_alice.to_string(), start, end, - None, # Is currently ignored ) self.assertEqual(1, len(results)) @@ -199,7 +197,6 @@ class RedactionTestCase(unittest.TestCase): self.u_alice.to_string(), start, end, - None, # Is currently ignored ) self.assertEqual(1, len(results)) @@ -228,7 +225,6 @@ class RedactionTestCase(unittest.TestCase): self.u_alice.to_string(), start, end, - None, # Is currently ignored ) self.assertEqual(1, len(results)) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index a658a789aa..e5c2c5cc8e 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -68,7 +68,6 @@ class StreamStoreTestCase(unittest.TestCase): self.u_bob.to_string(), start, end, - None, # Is currently ignored ) self.assertEqual(1, len(results)) @@ -105,7 +104,6 @@ class StreamStoreTestCase(unittest.TestCase): self.u_alice.to_string(), start, end, - None, # Is currently ignored ) self.assertEqual(1, len(results)) @@ -147,7 +145,6 @@ class StreamStoreTestCase(unittest.TestCase): self.u_bob.to_string(), start, end, - None, # Is currently ignored ) # We should not get the message, as it happened *after* bob left. @@ -175,7 +172,6 @@ class StreamStoreTestCase(unittest.TestCase): self.u_bob.to_string(), start, end, - None, # Is currently ignored ) # We should not get the message, as it happened *after* bob left. diff --git a/tests/utils.py b/tests/utils.py index 4da51291a4..ca2c33cf8e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -335,7 +335,7 @@ class MemoryDataStore(object): ] def get_room_events_stream(self, user_id=None, from_key=None, to_key=None, - room_id=None, limit=0, with_feedback=False): + limit=0, with_feedback=False): return ([], from_key) # TODO def get_joined_hosts_for_room(self, room_id): -- cgit 1.5.1 From 5897e773fd50733fa448b36275130e5441d36f31 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 3 Nov 2015 14:27:35 +0000 Subject: Spell "deferred" more correctly --- synapse/storage/tags.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 34aa38c06a..641ea250f0 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -156,7 +156,7 @@ class TagsStore(SQLBaseStore): def remove_tag_from_room(self, user_id, room_id, tag): """Remove a tag from a room for a user. Returns: - A deffered that completes once the tag has been removed + A deferred that completes once the tag has been removed """ def remove_tag_txn(txn, next_id): sql = ( -- cgit 1.5.1 From 7ce264ce5f1b2409081446bd8e1a4adc63675e06 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 3 Nov 2015 16:23:35 +0000 Subject: Fix broken cache for getting retry times. This meant we retried remote destinations way more frequently than we should --- synapse/federation/transaction_queue.py | 47 ++++++++++++++++---------------- synapse/storage/transactions.py | 48 +++++++++++---------------------- 2 files changed, 40 insertions(+), 55 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 32fa5e8c15..2a7dd343f3 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -202,19 +202,6 @@ class TransactionQueue(object): @defer.inlineCallbacks @log_function def _attempt_new_transaction(self, destination): - if destination in self.pending_transactions: - # XXX: pending_transactions can get stuck on by a never-ending - # request at which point pending_pdus_by_dest just keeps growing. - # we need application-layer timeouts of some flavour of these - # requests - logger.debug( - "TX [%s] Transaction already in progress", - destination - ) - return - - logger.debug("TX [%s] _attempt_new_transaction", destination) - # list of (pending_pdu, deferred, order) pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) @@ -228,20 +215,34 @@ class TransactionQueue(object): logger.debug("TX [%s] Nothing to send", destination) return - # Sort based on the order field - pending_pdus.sort(key=lambda t: t[2]) - - pdus = [x[0] for x in pending_pdus] - edus = [x[0] for x in pending_edus] - failures = [x[0].get_dict() for x in pending_failures] - deferreds = [ - x[1] - for x in pending_pdus + pending_edus + pending_failures - ] + if destination in self.pending_transactions: + # XXX: pending_transactions can get stuck on by a never-ending + # request at which point pending_pdus_by_dest just keeps growing. + # we need application-layer timeouts of some flavour of these + # requests + logger.debug( + "TX [%s] Transaction already in progress", + destination + ) + return + # NOTE: Nothing should be between the above check and the insertion below try: self.pending_transactions[destination] = 1 + logger.debug("TX [%s] _attempt_new_transaction", destination) + + # Sort based on the order field + pending_pdus.sort(key=lambda t: t[2]) + + pdus = [x[0] for x in pending_pdus] + edus = [x[0] for x in pending_edus] + failures = [x[0].get_dict() for x in pending_failures] + deferreds = [ + x[1] + for x in pending_pdus + pending_edus + pending_failures + ] + txn_id = str(self._next_txn_id) limiter = yield get_retry_limiter( diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index 15695e9831..4e0d7c9774 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -253,16 +253,6 @@ class TransactionStore(SQLBaseStore): retry_interval (int) - how long until next retry in ms """ - # As this is the new value, we might as well prefill the cache - self.get_destination_retry_timings.prefill( - destination, - { - "destination": destination, - "retry_last_ts": retry_last_ts, - "retry_interval": retry_interval - }, - ) - # XXX: we could chose to not bother persisting this if our cache thinks # this is a NOOP return self.runInteraction( @@ -275,31 +265,25 @@ class TransactionStore(SQLBaseStore): def _set_destination_retry_timings(self, txn, destination, retry_last_ts, retry_interval): - query = ( - "UPDATE destinations" - " SET retry_last_ts = ?, retry_interval = ?" - " WHERE destination = ?" - ) + txn.call_after(self.get_destination_retry_timings.invalidate, (destination,)) - txn.execute( - query, - ( - retry_last_ts, retry_interval, destination, - ) + self._simple_upsert_txn( + txn, + "destinations", + keyvalues={ + "destination": destination, + }, + values={ + "retry_last_ts": retry_last_ts, + "retry_interval": retry_interval, + }, + insertion_values={ + "destination": destination, + "retry_last_ts": retry_last_ts, + "retry_interval": retry_interval, + } ) - if txn.rowcount == 0: - # destination wasn't already in table. Insert it. - self._simple_insert_txn( - txn, - table="destinations", - values={ - "destination": destination, - "retry_last_ts": retry_last_ts, - "retry_interval": retry_interval, - } - ) - def get_destinations_needing_retry(self): """Get all destinations which are due a retry for sending a transaction. -- cgit 1.5.1 From f522f50a08d48042d103c98dbc3cfd4872b7d981 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Wed, 4 Nov 2015 17:29:07 +0000 Subject: Allow guests to register and call /events?room_id= This follows the same flows-based flow as regular registration, but as the only implemented flow has no requirements, it auto-succeeds. In the future, other flows (e.g. captcha) may be required, so clients should treat this like the regular registration flow choices. --- synapse/api/auth.py | 95 ++++++++++++++++------------- synapse/api/errors.py | 1 + synapse/config/registration.py | 6 ++ synapse/handlers/_base.py | 75 ++++++++++++++--------- synapse/handlers/auth.py | 5 +- synapse/handlers/message.py | 46 +++++++------- synapse/handlers/register.py | 12 ++-- synapse/rest/client/v1/admin.py | 2 +- synapse/rest/client/v1/directory.py | 4 +- synapse/rest/client/v1/events.py | 4 +- synapse/rest/client/v1/initial_sync.py | 2 +- synapse/rest/client/v1/presence.py | 8 +-- synapse/rest/client/v1/profile.py | 4 +- synapse/rest/client/v1/push_rule.py | 6 +- synapse/rest/client/v1/pusher.py | 2 +- synapse/rest/client/v1/room.py | 27 ++++---- synapse/rest/client/v1/voip.py | 2 +- synapse/rest/client/v2_alpha/account.py | 6 +- synapse/rest/client/v2_alpha/filter.py | 4 +- synapse/rest/client/v2_alpha/keys.py | 6 +- synapse/rest/client/v2_alpha/receipts.py | 2 +- synapse/rest/client/v2_alpha/register.py | 27 +++++++- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/rest/client/v2_alpha/tags.py | 6 +- synapse/rest/media/v0/content_repository.py | 2 +- synapse/rest/media/v1/upload_resource.py | 2 +- synapse/storage/registration.py | 15 ++--- tests/api/test_auth.py | 25 +++++++- tests/rest/client/v1/test_presence.py | 10 +-- tests/rest/client/v1/test_profile.py | 4 +- tests/rest/client/v1/test_rooms.py | 21 ++++--- tests/rest/client/v1/test_typing.py | 3 +- tests/rest/client/v2_alpha/__init__.py | 3 +- 33 files changed, 272 insertions(+), 167 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 88445fe999..dfbbc5a1cd 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -49,6 +49,7 @@ class Auth(object): self.TOKEN_NOT_FOUND_HTTP_STATUS = 401 self._KNOWN_CAVEAT_PREFIXES = set([ "gen = ", + "guest = ", "type = ", "time < ", "user_id = ", @@ -183,15 +184,11 @@ class Auth(object): defer.returnValue(member) @defer.inlineCallbacks - def check_user_was_in_room(self, room_id, user_id, current_state=None): + def check_user_was_in_room(self, room_id, user_id): """Check if the user was in the room at some point. Args: room_id(str): The room to check. user_id(str): The user to check. - current_state(dict): Optional map of the current state of the room. - If provided then that map is used to check whether they are a - member of the room. Otherwise the current membership is - loaded from the database. Raises: AuthError if the user was never in the room. Returns: @@ -199,17 +196,11 @@ class Auth(object): room. This will be the join event if they are currently joined to the room. This will be the leave event if they have left the room. """ - if current_state: - member = current_state.get( - (EventTypes.Member, user_id), - None - ) - else: - member = yield self.state.get_current_state( - room_id=room_id, - event_type=EventTypes.Member, - state_key=user_id - ) + member = yield self.state.get_current_state( + room_id=room_id, + event_type=EventTypes.Member, + state_key=user_id + ) membership = member.membership if member else None if membership not in (Membership.JOIN, Membership.LEAVE): @@ -497,7 +488,7 @@ class Auth(object): return default @defer.inlineCallbacks - def get_user_by_req(self, request): + def get_user_by_req(self, request, allow_guest=False): """ Get a registered user's ID. Args: @@ -535,7 +526,7 @@ class Auth(object): request.authenticated_entity = user_id - defer.returnValue((UserID.from_string(user_id), "")) + defer.returnValue((UserID.from_string(user_id), "", False)) return except KeyError: pass # normal users won't have the user_id query parameter set. @@ -543,6 +534,7 @@ class Auth(object): user_info = yield self._get_user_by_access_token(access_token) user = user_info["user"] token_id = user_info["token_id"] + is_guest = user_info["is_guest"] ip_addr = self.hs.get_ip_from_request(request) user_agent = request.requestHeaders.getRawHeaders( @@ -557,9 +549,14 @@ class Auth(object): user_agent=user_agent ) + if is_guest and not allow_guest: + raise AuthError( + 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN + ) + request.authenticated_entity = user.to_string() - defer.returnValue((user, token_id,)) + defer.returnValue((user, token_id, is_guest,)) except KeyError: raise AuthError( self.TOKEN_NOT_FOUND_HTTP_STATUS, "Missing access token.", @@ -592,31 +589,45 @@ class Auth(object): self._validate_macaroon(macaroon) user_prefix = "user_id = " + user = None + guest = False for caveat in macaroon.caveats: if caveat.caveat_id.startswith(user_prefix): user = UserID.from_string(caveat.caveat_id[len(user_prefix):]) - # This codepath exists so that we can actually return a - # token ID, because we use token IDs in place of device - # identifiers throughout the codebase. - # TODO(daniel): Remove this fallback when device IDs are - # properly implemented. - ret = yield self._look_up_user_by_access_token(macaroon_str) - if ret["user"] != user: - logger.error( - "Macaroon user (%s) != DB user (%s)", - user, - ret["user"] - ) - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "User mismatch in macaroon", - errcode=Codes.UNKNOWN_TOKEN - ) - defer.returnValue(ret) - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, "No user caveat in macaroon", - errcode=Codes.UNKNOWN_TOKEN - ) + elif caveat.caveat_id == "guest = true": + guest = True + + if user is None: + raise AuthError( + self.TOKEN_NOT_FOUND_HTTP_STATUS, "No user caveat in macaroon", + errcode=Codes.UNKNOWN_TOKEN + ) + + if guest: + ret = { + "user": user, + "is_guest": True, + "token_id": None, + } + else: + # This codepath exists so that we can actually return a + # token ID, because we use token IDs in place of device + # identifiers throughout the codebase. + # TODO(daniel): Remove this fallback when device IDs are + # properly implemented. + ret = yield self._look_up_user_by_access_token(macaroon_str) + if ret["user"] != user: + logger.error( + "Macaroon user (%s) != DB user (%s)", + user, + ret["user"] + ) + raise AuthError( + self.TOKEN_NOT_FOUND_HTTP_STATUS, + "User mismatch in macaroon", + errcode=Codes.UNKNOWN_TOKEN + ) + defer.returnValue(ret) except (pymacaroons.exceptions.MacaroonException, TypeError, ValueError): raise AuthError( self.TOKEN_NOT_FOUND_HTTP_STATUS, "Invalid macaroon passed.", @@ -629,6 +640,7 @@ class Auth(object): v.satisfy_exact("type = access") v.satisfy_general(lambda c: c.startswith("user_id = ")) v.satisfy_general(self._verify_expiry) + v.satisfy_exact("guest = true") v.verify(macaroon, self.hs.config.macaroon_secret_key) v = pymacaroons.Verifier() @@ -666,6 +678,7 @@ class Auth(object): user_info = { "user": UserID.from_string(ret.get("name")), "token_id": ret.get("token_id", None), + "is_guest": False, } defer.returnValue(user_info) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index b3fea27d0e..d4037b3d55 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -33,6 +33,7 @@ class Codes(object): NOT_FOUND = "M_NOT_FOUND" MISSING_TOKEN = "M_MISSING_TOKEN" UNKNOWN_TOKEN = "M_UNKNOWN_TOKEN" + GUEST_ACCESS_FORBIDDEN = "M_GUEST_ACCESS_FORBIDDEN" LIMIT_EXCEEDED = "M_LIMIT_EXCEEDED" CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED" CAPTCHA_INVALID = "M_CAPTCHA_INVALID" diff --git a/synapse/config/registration.py b/synapse/config/registration.py index f5ef36a9f4..dca391f7af 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -34,6 +34,7 @@ class RegistrationConfig(Config): self.registration_shared_secret = config.get("registration_shared_secret") self.macaroon_secret_key = config.get("macaroon_secret_key") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) + self.allow_guest_access = config.get("allow_guest_access", False) def default_config(self, **kwargs): registration_shared_secret = random_string_with_symbols(50) @@ -54,6 +55,11 @@ class RegistrationConfig(Config): # Larger numbers increase the work factor needed to generate the hash. # The default number of rounds is 12. bcrypt_rounds: 12 + + # Allows users to register as guests without a password/email/etc, and + # participate in rooms hosted on this server which have been made + # accessible to anonymous users. + allow_guest_access: False """ % locals() def add_arguments(self, parser): diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 6a26cb1879..6873a4575d 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -47,37 +47,23 @@ class BaseHandler(object): self.event_builder_factory = hs.get_event_builder_factory() @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, events): - event_id_to_state = yield self.store.get_state_for_events( - frozenset(e.event_id for e in events), - types=( - (EventTypes.RoomHistoryVisibility, ""), - (EventTypes.Member, user_id), - ) - ) + def _filter_events_for_client(self, user_id, events, is_guest=False): + # Assumes that user has at some point joined the room if not is_guest. - def allowed(event, state): - if event.type == EventTypes.RoomHistoryVisibility: + def allowed(event, membership, visibility): + if visibility == "world_readable": return True - membership_ev = state.get((EventTypes.Member, user_id), None) - if membership_ev: - membership = membership_ev.membership - else: - membership = Membership.LEAVE + if is_guest: + return False if membership == Membership.JOIN: return True - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history: - visibility = history.content.get("history_visibility", "shared") - else: - visibility = "shared" + if event.type == EventTypes.RoomHistoryVisibility: + return not is_guest - if visibility == "public": - return True - elif visibility == "shared": + if visibility == "shared": return True elif visibility == "joined": return membership == Membership.JOIN @@ -86,11 +72,44 @@ class BaseHandler(object): return True - defer.returnValue([ - event - for event in events - if allowed(event, event_id_to_state[event.event_id]) - ]) + event_id_to_state = yield self.store.get_state_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) + ) + + events_to_return = [] + for event in events: + state = event_id_to_state[event.event_id] + + membership_event = state.get((EventTypes.Member, user_id), None) + if membership_event: + membership = membership_event.membership + else: + membership = None + + visibility_event = state.get((EventTypes.RoomHistoryVisibility, ""), None) + if visibility_event: + visibility = visibility_event.content.get("history_visibility", "shared") + else: + visibility = "shared" + + should_include = allowed(event, membership, visibility) + if should_include: + events_to_return.append(event) + + if is_guest and len(events_to_return) < len(events): + # This indicates that some events in the requested range were not + # visible to guest users. To be safe, we reject the entire request, + # so that we don't have to worry about interpreting visibility + # boundaries. + raise AuthError(403, "User %s does not have permission" % ( + user_id + )) + + defer.returnValue(events_to_return) def ratelimit(self, user_id): time_now = self.clock.time() diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 055d395b20..1b11dbdffd 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -372,12 +372,15 @@ class AuthHandler(BaseHandler): yield self.store.add_refresh_token_to_user(user_id, refresh_token) defer.returnValue(refresh_token) - def generate_access_token(self, user_id): + def generate_access_token(self, user_id, extra_caveats=None): + extra_caveats = extra_caveats or [] macaroon = self._generate_base_macaroon(user_id) macaroon.add_first_party_caveat("type = access") now = self.hs.get_clock().time_msec() expiry = now + (60 * 60 * 1000) macaroon.add_first_party_caveat("time < %d" % (expiry,)) + for caveat in extra_caveats: + macaroon.add_first_party_caveat(caveat) return macaroon.serialize() def generate_refresh_token(self, user_id): diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 0f947993d1..687e1527f7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -71,20 +71,20 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def get_messages(self, user_id=None, room_id=None, pagin_config=None, - as_client_event=True): + as_client_event=True, is_guest=False): """Get messages in a room. Args: user_id (str): The user requesting messages. room_id (str): The room they want messages from. pagin_config (synapse.api.streams.PaginationConfig): The pagination - config rules to apply, if any. + config rules to apply, if any. as_client_event (bool): True to get events in client-server format. + is_guest (bool): Whether the requesting user is a guest (as opposed + to a fully registered user). Returns: dict: Pagination API results """ - member_event = yield self.auth.check_user_was_in_room(room_id, user_id) - data_source = self.hs.get_event_sources().sources["room"] if pagin_config.from_token: @@ -107,23 +107,27 @@ class MessageHandler(BaseHandler): source_config = pagin_config.get_source_config("room") - if member_event.membership == Membership.LEAVE: - # If they have left the room then clamp the token to be before - # they left the room - leave_token = yield self.store.get_topological_token_for_event( - member_event.event_id - ) - leave_token = RoomStreamToken.parse(leave_token) - if leave_token.topological < room_token.topological: - source_config.from_key = str(leave_token) - - if source_config.direction == "f": - if source_config.to_key is None: - source_config.to_key = str(leave_token) - else: - to_token = RoomStreamToken.parse(source_config.to_key) - if leave_token.topological < to_token.topological: + if not is_guest: + member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + if member_event.membership == Membership.LEAVE: + # If they have left the room then clamp the token to be before + # they left the room. + # If they're a guest, we'll just 403 them if they're asking for + # events they can't see. + leave_token = yield self.store.get_topological_token_for_event( + member_event.event_id + ) + leave_token = RoomStreamToken.parse(leave_token) + if leave_token.topological < room_token.topological: + source_config.from_key = str(leave_token) + + if source_config.direction == "f": + if source_config.to_key is None: source_config.to_key = str(leave_token) + else: + to_token = RoomStreamToken.parse(source_config.to_key) + if leave_token.topological < to_token.topological: + source_config.to_key = str(leave_token) yield self.hs.get_handlers().federation_handler.maybe_backfill( room_id, room_token.topological @@ -146,7 +150,7 @@ class MessageHandler(BaseHandler): "end": next_token.to_string(), }) - events = yield self._filter_events_for_client(user_id, events) + events = yield self._filter_events_for_client(user_id, events, is_guest=is_guest) time_now = self.clock.time_msec() diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index ef4081e3fe..493a087031 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -64,7 +64,7 @@ class RegistrationHandler(BaseHandler): ) @defer.inlineCallbacks - def register(self, localpart=None, password=None): + def register(self, localpart=None, password=None, generate_token=True): """Registers a new client on the server. Args: @@ -89,7 +89,9 @@ class RegistrationHandler(BaseHandler): user = UserID(localpart, self.hs.hostname) user_id = user.to_string() - token = self.auth_handler().generate_access_token(user_id) + token = None + if generate_token: + token = self.auth_handler().generate_access_token(user_id) yield self.store.register( user_id=user_id, token=token, @@ -102,14 +104,14 @@ class RegistrationHandler(BaseHandler): attempts = 0 user_id = None token = None - while not user_id and not token: + while not user_id: try: localpart = self._generate_user_id() user = UserID(localpart, self.hs.hostname) user_id = user.to_string() yield self.check_user_id_is_valid(user_id) - - token = self.auth_handler().generate_access_token(user_id) + if generate_token: + token = self.auth_handler().generate_access_token(user_id) yield self.store.register( user_id=user_id, token=token, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 504b63eab4..bdde43864c 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -31,7 +31,7 @@ class WhoisRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id): target_user = UserID.from_string(user_id) - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) is_admin = yield self.auth.is_server_admin(auth_user) if not is_admin and target_user != auth_user: diff --git a/synapse/rest/client/v1/directory.py b/synapse/rest/client/v1/directory.py index 4dcda57c1b..240eedac75 100644 --- a/synapse/rest/client/v1/directory.py +++ b/synapse/rest/client/v1/directory.py @@ -69,7 +69,7 @@ class ClientDirectoryServer(ClientV1RestServlet): try: # try to auth as a user - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) try: user_id = user.to_string() yield dir_handler.create_association( @@ -116,7 +116,7 @@ class ClientDirectoryServer(ClientV1RestServlet): # fallback to default user behaviour if they aren't an AS pass - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) is_admin = yield self.auth.is_server_admin(user) if not is_admin: diff --git a/synapse/rest/client/v1/events.py b/synapse/rest/client/v1/events.py index 582148b659..4073b0d2d1 100644 --- a/synapse/rest/client/v1/events.py +++ b/synapse/rest/client/v1/events.py @@ -34,7 +34,7 @@ class EventStreamRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) try: handler = self.handlers.event_stream_handler pagin_config = PaginationConfig.from_request(request) @@ -71,7 +71,7 @@ class EventRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, event_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) handler = self.handlers.event_handler event = yield handler.get_event(auth_user, event_id) diff --git a/synapse/rest/client/v1/initial_sync.py b/synapse/rest/client/v1/initial_sync.py index 52c7943400..856a70f297 100644 --- a/synapse/rest/client/v1/initial_sync.py +++ b/synapse/rest/client/v1/initial_sync.py @@ -25,7 +25,7 @@ class InitialSyncRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) as_client_event = "raw" not in request.args pagination_config = PaginationConfig.from_request(request) handler = self.handlers.message_handler diff --git a/synapse/rest/client/v1/presence.py b/synapse/rest/client/v1/presence.py index a770efd841..6fe5d19a22 100644 --- a/synapse/rest/client/v1/presence.py +++ b/synapse/rest/client/v1/presence.py @@ -32,7 +32,7 @@ class PresenceStatusRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user = UserID.from_string(user_id) state = yield self.handlers.presence_handler.get_state( @@ -42,7 +42,7 @@ class PresenceStatusRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_PUT(self, request, user_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user = UserID.from_string(user_id) state = {} @@ -77,7 +77,7 @@ class PresenceListRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user = UserID.from_string(user_id) if not self.hs.is_mine(user): @@ -97,7 +97,7 @@ class PresenceListRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request, user_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user = UserID.from_string(user_id) if not self.hs.is_mine(user): diff --git a/synapse/rest/client/v1/profile.py b/synapse/rest/client/v1/profile.py index fdde88a60d..6b379e4e5f 100644 --- a/synapse/rest/client/v1/profile.py +++ b/synapse/rest/client/v1/profile.py @@ -37,7 +37,7 @@ class ProfileDisplaynameRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_PUT(self, request, user_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user = UserID.from_string(user_id) try: @@ -70,7 +70,7 @@ class ProfileAvatarURLRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_PUT(self, request, user_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user = UserID.from_string(user_id) try: diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/v1/push_rule.py index bd759a2589..b0870db1ac 100644 --- a/synapse/rest/client/v1/push_rule.py +++ b/synapse/rest/client/v1/push_rule.py @@ -43,7 +43,7 @@ class PushRuleRestServlet(ClientV1RestServlet): except InvalidRuleException as e: raise SynapseError(400, e.message) - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) if '/' in spec['rule_id'] or '\\' in spec['rule_id']: raise SynapseError(400, "rule_id may not contain slashes") @@ -92,7 +92,7 @@ class PushRuleRestServlet(ClientV1RestServlet): def on_DELETE(self, request): spec = _rule_spec_from_path(request.postpath) - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) namespaced_rule_id = _namespaced_rule_id_from_spec(spec) @@ -109,7 +109,7 @@ class PushRuleRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) # we build up the full structure and then decide which bits of it # to send which means doing unnecessary work sometimes but is diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/v1/pusher.py index 3aabc93b8b..a110c0a4f0 100644 --- a/synapse/rest/client/v1/pusher.py +++ b/synapse/rest/client/v1/pusher.py @@ -27,7 +27,7 @@ class PusherRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request): - user, token_id = yield self.auth.get_user_by_req(request) + user, token_id, _ = yield self.auth.get_user_by_req(request) content = _parse_json(request) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 2dcaee86cd..0876e593c5 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -62,7 +62,7 @@ class RoomCreateRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) room_config = self.get_room_config(request) info = yield self.make_room(room_config, auth_user, None) @@ -125,7 +125,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id, event_type, state_key): - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) msg_handler = self.handlers.message_handler data = yield msg_handler.get_room_data( @@ -143,7 +143,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_PUT(self, request, room_id, event_type, state_key, txn_id=None): - user, token_id = yield self.auth.get_user_by_req(request) + user, token_id, _ = yield self.auth.get_user_by_req(request) content = _parse_json(request) @@ -175,7 +175,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request, room_id, event_type, txn_id=None): - user, token_id = yield self.auth.get_user_by_req(request) + user, token_id, _ = yield self.auth.get_user_by_req(request) content = _parse_json(request) msg_handler = self.handlers.message_handler @@ -220,7 +220,7 @@ class JoinRoomAliasServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request, room_identifier, txn_id=None): - user, token_id = yield self.auth.get_user_by_req(request) + user, token_id, _ = yield self.auth.get_user_by_req(request) # the identifier could be a room alias or a room id. Try one then the # other if it fails to parse, without swallowing other valid @@ -289,7 +289,7 @@ class RoomMemberListRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id): # TODO support Pagination stream API (limit/tokens) - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) handler = self.handlers.message_handler events = yield handler.get_state_events( room_id=room_id, @@ -325,7 +325,7 @@ class RoomMessageListRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id): - user, _ = yield self.auth.get_user_by_req(request) + user, _, is_guest = yield self.auth.get_user_by_req(request, allow_guest=True) pagination_config = PaginationConfig.from_request( request, default_limit=10, ) @@ -334,6 +334,7 @@ class RoomMessageListRestServlet(ClientV1RestServlet): msgs = yield handler.get_messages( room_id=room_id, user_id=user.to_string(), + is_guest=is_guest, pagin_config=pagination_config, as_client_event=as_client_event ) @@ -347,7 +348,7 @@ class RoomStateRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id): - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) handler = self.handlers.message_handler # Get all the current state for this room events = yield handler.get_state_events( @@ -363,7 +364,7 @@ class RoomInitialSyncRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id): - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) pagination_config = PaginationConfig.from_request(request) content = yield self.handlers.message_handler.room_initial_sync( room_id=room_id, @@ -443,7 +444,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request, room_id, membership_action, txn_id=None): - user, token_id = yield self.auth.get_user_by_req(request) + user, token_id, _ = yield self.auth.get_user_by_req(request) content = _parse_json(request) @@ -524,7 +525,7 @@ class RoomRedactEventRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request, room_id, event_id, txn_id=None): - user, token_id = yield self.auth.get_user_by_req(request) + user, token_id, _ = yield self.auth.get_user_by_req(request) content = _parse_json(request) msg_handler = self.handlers.message_handler @@ -564,7 +565,7 @@ class RoomTypingRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_PUT(self, request, room_id, user_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) room_id = urllib.unquote(room_id) target_user = UserID.from_string(urllib.unquote(user_id)) @@ -597,7 +598,7 @@ class SearchRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) content = _parse_json(request) diff --git a/synapse/rest/client/v1/voip.py b/synapse/rest/client/v1/voip.py index 0a863e1c61..eb7c57cade 100644 --- a/synapse/rest/client/v1/voip.py +++ b/synapse/rest/client/v1/voip.py @@ -28,7 +28,7 @@ class VoipRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) turnUris = self.hs.config.turn_uris turnSecret = self.hs.config.turn_shared_secret diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 4692ba413c..1970ad3458 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -55,7 +55,7 @@ class PasswordRestServlet(RestServlet): if LoginType.PASSWORD in result: # if using password, they should also be logged in - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) if auth_user.to_string() != result[LoginType.PASSWORD]: raise LoginError(400, "", Codes.UNKNOWN) user_id = auth_user.to_string() @@ -102,7 +102,7 @@ class ThreepidRestServlet(RestServlet): def on_GET(self, request): yield run_on_reactor() - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) threepids = yield self.hs.get_datastore().user_get_threepids( auth_user.to_string() @@ -120,7 +120,7 @@ class ThreepidRestServlet(RestServlet): raise SynapseError(400, "Missing param", Codes.MISSING_PARAM) threePidCreds = body['threePidCreds'] - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) threepid = yield self.identity_handler.threepid_from_creds(threePidCreds) diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/v2_alpha/filter.py index f8f91b63f5..97956a4b91 100644 --- a/synapse/rest/client/v2_alpha/filter.py +++ b/synapse/rest/client/v2_alpha/filter.py @@ -40,7 +40,7 @@ class GetFilterRestServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id, filter_id): target_user = UserID.from_string(user_id) - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) if target_user != auth_user: raise AuthError(403, "Cannot get filters for other users") @@ -76,7 +76,7 @@ class CreateFilterRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request, user_id): target_user = UserID.from_string(user_id) - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) if target_user != auth_user: raise AuthError(403, "Cannot create filters for other users") diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index a1f4423101..820d33336f 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -64,7 +64,7 @@ class KeyUploadServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request, device_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user_id = auth_user.to_string() # TODO: Check that the device_id matches that in the authentication # or derive the device_id from the authentication instead. @@ -109,7 +109,7 @@ class KeyUploadServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request, device_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) user_id = auth_user.to_string() result = yield self.store.count_e2e_one_time_keys(user_id, device_id) @@ -181,7 +181,7 @@ class KeyQueryServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id, device_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) auth_user_id = auth_user.to_string() user_id = user_id if user_id else auth_user_id device_ids = [device_id] if device_id else [] diff --git a/synapse/rest/client/v2_alpha/receipts.py b/synapse/rest/client/v2_alpha/receipts.py index b107b7ce17..788acd4adb 100644 --- a/synapse/rest/client/v2_alpha/receipts.py +++ b/synapse/rest/client/v2_alpha/receipts.py @@ -40,7 +40,7 @@ class ReceiptRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request, room_id, receipt_type, event_id): - user, _ = yield self.auth.get_user_by_req(request) + user, _, _ = yield self.auth.get_user_by_req(request) if receipt_type != "m.read": raise SynapseError(400, "Receipt type must be 'm.read'") diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 1ba2f29711..f899376311 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -16,7 +16,7 @@ from twisted.internet import defer from synapse.api.constants import LoginType -from synapse.api.errors import SynapseError, Codes +from synapse.api.errors import SynapseError, Codes, UnrecognizedRequestError from synapse.http.servlet import RestServlet from ._base import client_v2_pattern, parse_json_dict_from_request @@ -55,6 +55,19 @@ class RegisterRestServlet(RestServlet): def on_POST(self, request): yield run_on_reactor() + kind = "user" + if "kind" in request.args: + kind = request.args["kind"][0] + + if kind == "guest": + ret = yield self._do_guest_registration() + defer.returnValue(ret) + return + elif kind != "user": + raise UnrecognizedRequestError( + "Do not understand membership kind: %s" % (kind,) + ) + if '/register/email/requestToken' in request.path: ret = yield self.onEmailTokenRequest(request) defer.returnValue(ret) @@ -236,6 +249,18 @@ class RegisterRestServlet(RestServlet): ret = yield self.identity_handler.requestEmailToken(**body) defer.returnValue((200, ret)) + @defer.inlineCallbacks + def _do_guest_registration(self): + if not self.hs.config.allow_guest_access: + defer.returnValue((403, "Guest access is disabled")) + user_id, _ = yield self.registration_handler.register(generate_token=False) + access_token = self.auth_handler.generate_access_token(user_id, ["guest = true"]) + defer.returnValue((200, { + "user_id": user_id, + "access_token": access_token, + "home_server": self.hs.hostname, + })) + def register_servlets(hs, http_server): RegisterRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 32a1087c91..d24507effa 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -81,7 +81,7 @@ class SyncRestServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request): - user, token_id = yield self.auth.get_user_by_req(request) + user, token_id, _ = yield self.auth.get_user_by_req(request) timeout = parse_integer(request, "timeout", default=0) since = parse_string(request, "since") diff --git a/synapse/rest/client/v2_alpha/tags.py b/synapse/rest/client/v2_alpha/tags.py index dcfe6bd20e..35482ae6a6 100644 --- a/synapse/rest/client/v2_alpha/tags.py +++ b/synapse/rest/client/v2_alpha/tags.py @@ -42,7 +42,7 @@ class TagListServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id, room_id): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) if user_id != auth_user.to_string(): raise AuthError(403, "Cannot get tags for other users.") @@ -68,7 +68,7 @@ class TagServlet(RestServlet): @defer.inlineCallbacks def on_PUT(self, request, user_id, room_id, tag): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) if user_id != auth_user.to_string(): raise AuthError(403, "Cannot add tags for other users.") @@ -88,7 +88,7 @@ class TagServlet(RestServlet): @defer.inlineCallbacks def on_DELETE(self, request, user_id, room_id, tag): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) if user_id != auth_user.to_string(): raise AuthError(403, "Cannot add tags for other users.") diff --git a/synapse/rest/media/v0/content_repository.py b/synapse/rest/media/v0/content_repository.py index c28dc86cd7..e4fa8c4647 100644 --- a/synapse/rest/media/v0/content_repository.py +++ b/synapse/rest/media/v0/content_repository.py @@ -66,7 +66,7 @@ class ContentRepoResource(resource.Resource): @defer.inlineCallbacks def map_request_to_name(self, request): # auth the user - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) # namespace all file uploads on the user prefix = base64.urlsafe_b64encode( diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 6abaf56b25..7d61596082 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -70,7 +70,7 @@ class UploadResource(BaseMediaResource): @request_handler @defer.inlineCallbacks def _async_render_POST(self, request): - auth_user, _ = yield self.auth.get_user_by_req(request) + auth_user, _, _ = yield self.auth.get_user_by_req(request) # TODO: The checks here are a bit late. The content will have # already been uploaded to a tmp file at this point content_length = request.getHeader("Content-Length") diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index b454dd5b3a..2e5eddd259 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -102,13 +102,14 @@ class RegistrationStore(SQLBaseStore): 400, "User ID already taken.", errcode=Codes.USER_IN_USE ) - # it's possible for this to get a conflict, but only for a single user - # since tokens are namespaced based on their user ID - txn.execute( - "INSERT INTO access_tokens(id, user_id, token)" - " VALUES (?,?,?)", - (next_id, user_id, token,) - ) + if token: + # it's possible for this to get a conflict, but only for a single user + # since tokens are namespaced based on their user ID + txn.execute( + "INSERT INTO access_tokens(id, user_id, token)" + " VALUES (?,?,?)", + (next_id, user_id, token,) + ) def get_user_by_id(self, user_id): return self._simple_select_one( diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index c96273480d..70d928defe 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -51,7 +51,7 @@ class AuthTestCase(unittest.TestCase): request = Mock(args={}) request.args["access_token"] = [self.test_token] request.requestHeaders.getRawHeaders = Mock(return_value=[""]) - (user, _) = yield self.auth.get_user_by_req(request) + (user, _, _) = yield self.auth.get_user_by_req(request) self.assertEquals(user.to_string(), self.test_user) def test_get_user_by_req_user_bad_token(self): @@ -86,7 +86,7 @@ class AuthTestCase(unittest.TestCase): request = Mock(args={}) request.args["access_token"] = [self.test_token] request.requestHeaders.getRawHeaders = Mock(return_value=[""]) - (user, _) = yield self.auth.get_user_by_req(request) + (user, _, _) = yield self.auth.get_user_by_req(request) self.assertEquals(user.to_string(), self.test_user) def test_get_user_by_req_appservice_bad_token(self): @@ -121,7 +121,7 @@ class AuthTestCase(unittest.TestCase): request.args["access_token"] = [self.test_token] request.args["user_id"] = [masquerading_user_id] request.requestHeaders.getRawHeaders = Mock(return_value=[""]) - (user, _) = yield self.auth.get_user_by_req(request) + (user, _, _) = yield self.auth.get_user_by_req(request) self.assertEquals(user.to_string(), masquerading_user_id) def test_get_user_by_req_appservice_valid_token_bad_user_id(self): @@ -158,6 +158,25 @@ class AuthTestCase(unittest.TestCase): user = user_info["user"] self.assertEqual(UserID.from_string(user_id), user) + @defer.inlineCallbacks + def test_get_guest_user_from_macaroon(self): + user_id = "@baldrick:matrix.org" + macaroon = pymacaroons.Macaroon( + location=self.hs.config.server_name, + identifier="key", + key=self.hs.config.macaroon_secret_key) + macaroon.add_first_party_caveat("gen = 1") + macaroon.add_first_party_caveat("type = access") + macaroon.add_first_party_caveat("user_id = %s" % (user_id,)) + macaroon.add_first_party_caveat("guest = true") + serialized = macaroon.serialize() + + user_info = yield self.auth._get_user_from_macaroon(serialized) + user = user_info["user"] + is_guest = user_info["is_guest"] + self.assertEqual(UserID.from_string(user_id), user) + self.assertTrue(is_guest) + @defer.inlineCallbacks def test_get_user_from_macaroon_user_db_mismatch(self): self.store.get_user_by_access_token = Mock( diff --git a/tests/rest/client/v1/test_presence.py b/tests/rest/client/v1/test_presence.py index 0e3b922246..3e0f294630 100644 --- a/tests/rest/client/v1/test_presence.py +++ b/tests/rest/client/v1/test_presence.py @@ -86,10 +86,11 @@ class PresenceStateTestCase(unittest.TestCase): return defer.succeed([]) self.datastore.get_presence_list = get_presence_list - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(myid), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token @@ -173,10 +174,11 @@ class PresenceListTestCase(unittest.TestCase): ) self.datastore.has_presence_state = has_presence_state - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(myid), "token_id": 1, + "is_guest": False, } hs.handlers.room_member_handler = Mock( @@ -291,8 +293,8 @@ class PresenceEventStreamTestCase(unittest.TestCase): hs.get_clock().time_msec.return_value = 1000000 - def _get_user_by_req(req=None): - return (UserID.from_string(myid), "") + def _get_user_by_req(req=None, allow_guest=False): + return (UserID.from_string(myid), "", False) hs.get_v1auth().get_user_by_req = _get_user_by_req diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index 929e5e5dd4..adcc1d1969 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -52,8 +52,8 @@ class ProfileTestCase(unittest.TestCase): replication_layer=Mock(), ) - def _get_user_by_req(request=None): - return (UserID.from_string(myid), "") + def _get_user_by_req(request=None, allow_guest=False): + return (UserID.from_string(myid), "", False) hs.get_v1auth().get_user_by_req = _get_user_by_req diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 93896dd076..b43563fa4b 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -54,10 +54,11 @@ class RoomPermissionsTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token @@ -439,10 +440,11 @@ class RoomsMemberListTestCase(RestTestCase): self.auth_user_id = self.user_id - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token @@ -517,10 +519,11 @@ class RoomsCreateTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token @@ -608,10 +611,11 @@ class RoomTopicTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token @@ -713,10 +717,11 @@ class RoomMemberStateTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token @@ -838,10 +843,11 @@ class RoomMessagesTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token @@ -933,10 +939,11 @@ class RoomInitialSyncTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 6395ce79db..8433585616 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -61,10 +61,11 @@ class RoomTypingTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.auth_user_id), "token_id": 1, + "is_guest": False, } hs.get_v1auth()._get_user_by_access_token = _get_user_by_access_token diff --git a/tests/rest/client/v2_alpha/__init__.py b/tests/rest/client/v2_alpha/__init__.py index f45570a1c0..fa9e17ec4f 100644 --- a/tests/rest/client/v2_alpha/__init__.py +++ b/tests/rest/client/v2_alpha/__init__.py @@ -43,10 +43,11 @@ class V2AlphaRestTestCase(unittest.TestCase): resource_for_federation=self.mock_resource, ) - def _get_user_by_access_token(token=None): + def _get_user_by_access_token(token=None, allow_guest=False): return { "user": UserID.from_string(self.USER_ID), "token_id": 1, + "is_guest": False, } hs.get_auth()._get_user_by_access_token = _get_user_by_access_token -- cgit 1.5.1 From 05c326d445fdd04ef06cb9a2a02cc0d6dde9935b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 4 Nov 2015 17:57:44 +0000 Subject: Implement order and group by --- synapse/handlers/search.py | 126 +++++++++++++++++++++++++++++++++++++++------ synapse/storage/search.py | 96 ++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+), 17 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 2718e9482e..28f5300dc9 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -46,15 +46,20 @@ class SearchHandler(BaseHandler): """ try: - search_term = content["search_categories"]["room_events"]["search_term"] - keys = content["search_categories"]["room_events"].get("keys", [ + room_cat = content["search_categories"]["room_events"] + search_term = room_cat["search_term"] + keys = room_cat.get("keys", [ "content.body", "content.name", "content.topic", ]) - filter_dict = content["search_categories"]["room_events"].get("filter", {}) - event_context = content["search_categories"]["room_events"].get( + filter_dict = room_cat.get("filter", {}) + order_by = room_cat.get("order_by", "rank") + event_context = room_cat.get( "event_context", None ) + group_by = room_cat.get("groupings", {}).get("group_by", {}) + group_keys = [g["key"] for g in group_by] + if event_context is not None: before_limit = int(event_context.get( "before_limit", 5 @@ -65,6 +70,15 @@ class SearchHandler(BaseHandler): except KeyError: raise SynapseError(400, "Invalid search query") + if order_by not in ("rank", "recent"): + raise SynapseError(400, "Invalid order by: %r" % (order_by,)) + + if set(group_keys) - {"room_id", "sender"}: + raise SynapseError( + 400, + "Invalid group by keys: %r" % (set(group_keys) - {"room_id", "sender"},) + ) + search_filter = Filter(filter_dict) # TODO: Search through left rooms too @@ -77,18 +91,88 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) - rank_map, event_map, _ = yield self.store.search_msgs( - room_ids, search_term, keys - ) + rank_map = {} + allowed_events = [] + room_groups = {} + sender_group = {} - filtered_events = search_filter.filter(event_map.values()) + if order_by == "rank": + rank_map, event_map, _ = yield self.store.search_msgs( + room_ids, search_term, keys + ) - allowed_events = yield self._filter_events_for_client( - user.to_string(), filtered_events - ) + filtered_events = search_filter.filter(event_map.values()) - allowed_events.sort(key=lambda e: -rank_map[e.event_id]) - allowed_events = allowed_events[:search_filter.limit()] + events = yield self._filter_events_for_client( + user.to_string(), filtered_events + ) + + events.sort(key=lambda e: -rank_map[e.event_id]) + allowed_events = events[:search_filter.limit()] + + for e in allowed_events: + rm = room_groups.setdefault(e.room_id, { + "results": [], + "order": rank_map[e.event_id], + }) + rm["results"].append(e.event_id) + + s = sender_group.setdefault(e.sender, { + "results": [], + "order": rank_map[e.event_id], + }) + s["results"].append(e.event_id) + + elif order_by == "recent": + for room_id in room_ids: + room_events = [] + pagination_token = None + i = 0 + + while len(room_events) < search_filter.limit() and i < 5: + i += 5 + r_map, event_map, pagination_token = yield self.store.search_room( + room_id, search_term, keys, search_filter.limit() * 2, + pagination_token=pagination_token, + ) + rank_map.update(r_map) + + filtered_events = search_filter.filter(event_map.values()) + + events = yield self._filter_events_for_client( + user.to_string(), filtered_events + ) + + room_events.extend(events) + room_events = room_events[:search_filter.limit()] + + if len(event_map) < search_filter.limit() * 2: + break + + if room_events: + group = room_groups.setdefault(room_id, {}) + if pagination_token: + group["next_batch"] = pagination_token + + group["results"] = [e.event_id for e in room_events] + group["order"] = max( + e.origin_server_ts/1000 for e in room_events + if hasattr(e, "origin_server_ts") + ) + + allowed_events.extend(room_events) + + # Normalize the group ranks + if room_groups: + mx = max(g["order"] for g in room_groups.values()) + mn = min(g["order"] for g in room_groups.values()) + + for g in room_groups.values(): + g["order"] = (g["order"] - mn) * 1.0 / (mx - mn) + + else: + # We should never get here due to the guard earlier. + raise NotImplementedError() if event_context is not None: now_token = yield self.hs.get_event_sources().get_current_token() @@ -144,11 +228,19 @@ class SearchHandler(BaseHandler): logger.info("Found %d results", len(results)) + rooms_cat_res = { + "results": results, + "count": len(results) + } + + if room_groups and "room_id" in group_keys: + rooms_cat_res.setdefault("groups", {})["room_id"] = room_groups + + if sender_group and "sender" in group_keys: + rooms_cat_res.setdefault("groups", {})["sender"] = sender_group + defer.returnValue({ "search_categories": { - "room_events": { - "results": results, - "count": len(results) - } + "room_events": rooms_cat_res } }) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index cdf003502f..e37e56c1f2 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -20,6 +20,12 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine from collections import namedtuple +import logging + + +logger = logging.getLogger(__name__) + + """The result of a search. Fields: @@ -109,3 +115,93 @@ class SearchStore(SQLBaseStore): event_map, None )) + + @defer.inlineCallbacks + def search_room(self, room_id, search_term, keys, limit, pagination_token=None): + """Performs a full text search over events with given keys. + + Args: + room_id (str): The room_id to search in + search_term (str): Search term to search for + keys (list): List of keys to search in, currently supports + "content.body", "content.name", "content.topic" + pagination_token (str): A pagination token previously returned + + Returns: + SearchResult + """ + clauses = [] + args = [search_term, room_id] + + local_clauses = [] + for key in keys: + local_clauses.append("key = ?") + args.append(key) + + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) + + if pagination_token: + topo, stream = pagination_token.split(",") + clauses.append( + "(topological_ordering < ?" + " OR (topological_ordering = ? AND stream_ordering < ?))" + ) + args.extend([topo, topo, stream]) + + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "SELECT ts_rank_cd(vector, query) as rank," + " topological_ordering, stream_ordering, room_id, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " NATURAL JOIN events" + " WHERE vector @@ query AND room_id = ?" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" + " topological_ordering, stream_ordering" + " FROM event_search" + " NATURAL JOIN events" + " WHERE value MATCH ? AND room_id = ?" + ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + for clause in clauses: + sql += " AND " + clause + + # We add an arbitrary limit here to ensure we don't try to pull the + # entire table from the database. + sql += " ORDER BY topological_ordering DESC, stream_ordering DESC LIMIT ?" + + args.append(limit) + + results = yield self._execute( + "search_rooms", self.cursor_to_dict, sql, *args + ) + + events = yield self._get_events([r["event_id"] for r in results]) + + event_map = { + ev.event_id: ev + for ev in events + } + + pagination_token = None + if results: + topo = results[-1]["topological_ordering"] + stream = results[-1]["stream_ordering"] + pagination_token = "%s,%s" % (topo, stream) + + defer.returnValue(SearchResult( + { + r["event_id"]: r["rank"] + for r in results + if r["event_id"] in event_map + }, + event_map, + pagination_token + )) -- cgit 1.5.1 From ca2f90742d5606f8fc5b7ddd3dd7244c377c1df8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Thu, 5 Nov 2015 14:32:26 +0000 Subject: Open up /events to anonymous users for room events only Squash-merge of PR #345 from daniel/anonymousevents --- synapse/handlers/_base.py | 7 ++- synapse/handlers/events.py | 10 ++- synapse/handlers/message.py | 47 ++++++++++---- synapse/handlers/presence.py | 4 +- synapse/handlers/private_user_data.py | 2 +- synapse/handlers/receipts.py | 6 +- synapse/handlers/room.py | 11 +++- synapse/handlers/sync.py | 20 +++++- synapse/handlers/typing.py | 11 +--- synapse/notifier.py | 42 ++++++++++--- synapse/rest/client/v1/events.py | 13 +++- synapse/rest/client/v1/room.py | 6 +- synapse/storage/events.py | 2 + synapse/storage/room.py | 13 ++++ .../storage/schema/delta/25/history_visibility.sql | 26 ++++++++ synapse/storage/stream.py | 46 +++++++++++--- tests/handlers/test_presence.py | 71 ++++++++++++++++------ tests/handlers/test_typing.py | 30 +++++++-- tests/rest/client/v1/test_presence.py | 9 ++- tests/rest/client/v1/test_typing.py | 5 +- 20 files changed, 299 insertions(+), 82 deletions(-) create mode 100644 synapse/storage/schema/delta/25/history_visibility.sql (limited to 'synapse/storage') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 6873a4575d..a9e43052b7 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -47,7 +47,8 @@ class BaseHandler(object): self.event_builder_factory = hs.get_event_builder_factory() @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, events, is_guest=False): + def _filter_events_for_client(self, user_id, events, is_guest=False, + require_all_visible_for_guests=True): # Assumes that user has at some point joined the room if not is_guest. def allowed(event, membership, visibility): @@ -100,7 +101,9 @@ class BaseHandler(object): if should_include: events_to_return.append(event) - if is_guest and len(events_to_return) < len(events): + if (require_all_visible_for_guests + and is_guest + and len(events_to_return) < len(events)): # This indicates that some events in the requested range were not # visible to guest users. To be safe, we reject the entire request, # so that we don't have to worry about interpreting visibility diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 53c8ca3a26..0e4c0d4d06 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -100,7 +100,7 @@ class EventStreamHandler(BaseHandler): @log_function def get_stream(self, auth_user_id, pagin_config, timeout=0, as_client_event=True, affect_presence=True, - only_room_events=False): + only_room_events=False, room_id=None, is_guest=False): """Fetches the events stream for a given user. If `only_room_events` is `True` only room events will be returned. @@ -119,9 +119,15 @@ class EventStreamHandler(BaseHandler): # thundering herds on restart. timeout = random.randint(int(timeout*0.9), int(timeout*1.1)) + if is_guest: + yield self.distributor.fire( + "user_joined_room", user=auth_user, room_id=room_id + ) + events, tokens = yield self.notifier.get_events_for( auth_user, pagin_config, timeout, - only_room_events=only_room_events + only_room_events=only_room_events, + is_guest=is_guest, guest_room_id=room_id ) time_now = self.clock.time_msec() diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 687e1527f7..654ecd2b37 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -16,7 +16,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import SynapseError +from synapse.api.errors import SynapseError, AuthError, Codes from synapse.streams.config import PaginationConfig from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator @@ -229,7 +229,7 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def get_room_data(self, user_id=None, room_id=None, - event_type=None, state_key=""): + event_type=None, state_key="", is_guest=False): """ Get data from a room. Args: @@ -239,23 +239,42 @@ class MessageHandler(BaseHandler): Raises: SynapseError if something went wrong. """ - member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + membership, membership_event_id = yield self._check_in_room_or_world_readable( + room_id, user_id, is_guest + ) - if member_event.membership == Membership.JOIN: + if membership == Membership.JOIN: data = yield self.state_handler.get_current_state( room_id, event_type, state_key ) - elif member_event.membership == Membership.LEAVE: + elif membership == Membership.LEAVE: key = (event_type, state_key) room_state = yield self.store.get_state_for_events( - [member_event.event_id], [key] + [membership_event_id], [key] ) - data = room_state[member_event.event_id].get(key) + data = room_state[membership_event_id].get(key) defer.returnValue(data) @defer.inlineCallbacks - def get_state_events(self, user_id, room_id): + def _check_in_room_or_world_readable(self, room_id, user_id, is_guest): + if is_guest: + visibility = yield self.state_handler.get_current_state( + room_id, EventTypes.RoomHistoryVisibility, "" + ) + if visibility.content["history_visibility"] == "world_readable": + defer.returnValue((Membership.JOIN, None)) + return + else: + raise AuthError( + 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN + ) + else: + member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + defer.returnValue((member_event.membership, member_event.event_id)) + + @defer.inlineCallbacks + def get_state_events(self, user_id, room_id, is_guest=False): """Retrieve all state events for a given room. If the user is joined to the room then return the current state. If the user has left the room return the state events from when they left. @@ -266,15 +285,17 @@ class MessageHandler(BaseHandler): Returns: A list of dicts representing state events. [{}, {}, {}] """ - member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + membership, membership_event_id = yield self._check_in_room_or_world_readable( + room_id, user_id, is_guest + ) - if member_event.membership == Membership.JOIN: + if membership == Membership.JOIN: room_state = yield self.state_handler.get_current_state(room_id) - elif member_event.membership == Membership.LEAVE: + elif membership == Membership.LEAVE: room_state = yield self.store.get_state_for_events( - [member_event.event_id], None + [membership_event_id], None ) - room_state = room_state[member_event.event_id] + room_state = room_state[membership_event_id] now = self.clock.time_msec() defer.returnValue( diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index ce60642127..0b780cd528 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -1142,8 +1142,9 @@ class PresenceEventSource(object): @defer.inlineCallbacks @log_function - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, user, from_key, room_ids=None, **kwargs): from_key = int(from_key) + room_ids = room_ids or [] presence = self.hs.get_handlers().presence_handler cachemap = presence._user_cachemap @@ -1161,7 +1162,6 @@ class PresenceEventSource(object): user_ids_to_check |= set( UserID.from_string(p["observed_user_id"]) for p in presence_list ) - room_ids = yield presence.get_joined_rooms_for_user(user) for room_id in set(room_ids) & set(presence._room_serials): if presence._room_serials[room_id] > from_key: joined = yield presence.get_joined_users_for_room_id(room_id) diff --git a/synapse/handlers/private_user_data.py b/synapse/handlers/private_user_data.py index 1778c71325..1abe45ed7b 100644 --- a/synapse/handlers/private_user_data.py +++ b/synapse/handlers/private_user_data.py @@ -24,7 +24,7 @@ class PrivateUserDataEventSource(object): return self.store.get_max_private_user_data_stream_id() @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, user, from_key, **kwargs): user_id = user.to_string() last_stream_id = from_key diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index a47ae3df42..973f4d5cae 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -164,17 +164,15 @@ class ReceiptEventSource(object): self.store = hs.get_datastore() @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, from_key, room_ids, **kwargs): from_key = int(from_key) to_key = yield self.get_current_key() if from_key == to_key: defer.returnValue(([], to_key)) - rooms = yield self.store.get_rooms_for_user(user.to_string()) - rooms = [room.room_id for room in rooms] events = yield self.store.get_linearized_receipts_for_rooms( - rooms, + room_ids, from_key=from_key, to_key=to_key, ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 9184dcd048..736ffe9066 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -807,7 +807,14 @@ class RoomEventSource(object): self.store = hs.get_datastore() @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events( + self, + user, + from_key, + limit, + room_ids, + is_guest, + ): # We just ignore the key for now. to_key = yield self.get_current_key() @@ -828,6 +835,8 @@ class RoomEventSource(object): from_key=from_key, to_key=to_key, limit=limit, + room_ids=room_ids, + is_guest=is_guest, ) defer.returnValue((events, end_key)) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1c1ee34b1e..5294d96466 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -295,11 +295,16 @@ class SyncHandler(BaseHandler): typing_key = since_token.typing_key if since_token else "0" + rooms = yield self.store.get_rooms_for_user(sync_config.user.to_string()) + room_ids = [room.room_id for room in rooms] + typing_source = self.event_sources.sources["typing"] - typing, typing_key = yield typing_source.get_new_events_for_user( + typing, typing_key = yield typing_source.get_new_events( user=sync_config.user, from_key=typing_key, limit=sync_config.filter.ephemeral_limit(), + room_ids=room_ids, + is_guest=False, ) now_token = now_token.copy_and_replace("typing_key", typing_key) @@ -312,10 +317,13 @@ class SyncHandler(BaseHandler): receipt_key = since_token.receipt_key if since_token else "0" receipt_source = self.event_sources.sources["receipt"] - receipts, receipt_key = yield receipt_source.get_new_events_for_user( + receipts, receipt_key = yield receipt_source.get_new_events( user=sync_config.user, from_key=receipt_key, limit=sync_config.filter.ephemeral_limit(), + room_ids=room_ids, + # /sync doesn't support guest access, they can't get to this point in code + is_guest=False, ) now_token = now_token.copy_and_replace("receipt_key", receipt_key) @@ -360,11 +368,17 @@ class SyncHandler(BaseHandler): """ now_token = yield self.event_sources.get_current_token() + rooms = yield self.store.get_rooms_for_user(sync_config.user.to_string()) + room_ids = [room.room_id for room in rooms] + presence_source = self.event_sources.sources["presence"] - presence, presence_key = yield presence_source.get_new_events_for_user( + presence, presence_key = yield presence_source.get_new_events( user=sync_config.user, from_key=since_token.presence_key, limit=sync_config.filter.presence_limit(), + room_ids=room_ids, + # /sync doesn't support guest access, they can't get to this point in code + is_guest=False, ) now_token = now_token.copy_and_replace("presence_key", presence_key) diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index d7096aab8c..2846f3e6e8 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -246,17 +246,12 @@ class TypingNotificationEventSource(object): }, } - @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, from_key, room_ids, **kwargs): from_key = int(from_key) handler = self.handler() - joined_room_ids = ( - yield self.room_member_handler().get_joined_rooms_for_user(user) - ) - events = [] - for room_id in joined_room_ids: + for room_id in room_ids: if room_id not in handler._room_serials: continue if handler._room_serials[room_id] <= from_key: @@ -264,7 +259,7 @@ class TypingNotificationEventSource(object): events.append(self._make_event_for(room_id)) - defer.returnValue((events, handler._latest_room_serial)) + return events, handler._latest_room_serial def get_current_key(self): return self.handler()._latest_room_serial diff --git a/synapse/notifier.py b/synapse/notifier.py index b69da63d43..56c4c863b5 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -269,7 +269,7 @@ class Notifier(object): logger.exception("Failed to notify listener") @defer.inlineCallbacks - def wait_for_events(self, user, timeout, callback, + def wait_for_events(self, user, timeout, callback, room_ids=None, from_token=StreamToken("s0", "0", "0", "0", "0")): """Wait until the callback returns a non empty response or the timeout fires. @@ -279,11 +279,12 @@ class Notifier(object): if user_stream is None: appservice = yield self.store.get_app_service_by_user_id(user) current_token = yield self.event_sources.get_current_token() - rooms = yield self.store.get_rooms_for_user(user) - rooms = [room.room_id for room in rooms] + if room_ids is None: + rooms = yield self.store.get_rooms_for_user(user) + room_ids = [room.room_id for room in rooms] user_stream = _NotifierUserStream( user=user, - rooms=rooms, + rooms=room_ids, appservice=appservice, current_token=current_token, time_now_ms=self.clock.time_msec(), @@ -329,7 +330,8 @@ class Notifier(object): @defer.inlineCallbacks def get_events_for(self, user, pagination_config, timeout, - only_room_events=False): + only_room_events=False, + is_guest=False, guest_room_id=None): """ For the given user and rooms, return any new events for them. If there are no new events wait for up to `timeout` milliseconds for any new events to happen before returning. @@ -342,6 +344,16 @@ class Notifier(object): limit = pagination_config.limit + room_ids = [] + if is_guest: + # TODO(daniel): Deal with non-room events too + only_room_events = True + if guest_room_id: + room_ids = [guest_room_id] + else: + rooms = yield self.store.get_rooms_for_user(user.to_string()) + room_ids = [room.room_id for room in rooms] + @defer.inlineCallbacks def check_for_updates(before_token, after_token): if not after_token.is_after(before_token): @@ -357,9 +369,23 @@ class Notifier(object): continue if only_room_events and name != "room": continue - new_events, new_key = yield source.get_new_events_for_user( - user, getattr(from_token, keyname), limit, + new_events, new_key = yield source.get_new_events( + user=user, + from_key=getattr(from_token, keyname), + limit=limit, + is_guest=is_guest, + room_ids=room_ids, ) + + if is_guest: + room_member_handler = self.hs.get_handlers().room_member_handler + new_events = yield room_member_handler._filter_events_for_client( + user.to_string(), + new_events, + is_guest=is_guest, + require_all_visible_for_guests=False + ) + events.extend(new_events) end_token = end_token.copy_and_replace(keyname, new_key) @@ -369,7 +395,7 @@ class Notifier(object): defer.returnValue(None) result = yield self.wait_for_events( - user, timeout, check_for_updates, from_token=from_token + user, timeout, check_for_updates, room_ids=room_ids, from_token=from_token ) if result is None: diff --git a/synapse/rest/client/v1/events.py b/synapse/rest/client/v1/events.py index 4073b0d2d1..3e1750d1a1 100644 --- a/synapse/rest/client/v1/events.py +++ b/synapse/rest/client/v1/events.py @@ -34,7 +34,15 @@ class EventStreamRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): - auth_user, _, _ = yield self.auth.get_user_by_req(request) + auth_user, _, is_guest = yield self.auth.get_user_by_req( + request, + allow_guest=True + ) + room_id = None + if is_guest: + if "room_id" not in request.args: + raise SynapseError(400, "Guest users must specify room_id param") + room_id = request.args["room_id"][0] try: handler = self.handlers.event_stream_handler pagin_config = PaginationConfig.from_request(request) @@ -49,7 +57,8 @@ class EventStreamRestServlet(ClientV1RestServlet): chunk = yield handler.get_stream( auth_user.to_string(), pagin_config, timeout=timeout, - as_client_event=as_client_event + as_client_event=as_client_event, affect_presence=(not is_guest), + room_id=room_id, is_guest=is_guest ) except: logger.exception("Event stream failed") diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 0876e593c5..afb802baec 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -125,7 +125,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id, event_type, state_key): - user, _, _ = yield self.auth.get_user_by_req(request) + user, _, is_guest = yield self.auth.get_user_by_req(request, allow_guest=True) msg_handler = self.handlers.message_handler data = yield msg_handler.get_room_data( @@ -133,6 +133,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): room_id=room_id, event_type=event_type, state_key=state_key, + is_guest=is_guest, ) if not data: @@ -348,12 +349,13 @@ class RoomStateRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id): - user, _, _ = yield self.auth.get_user_by_req(request) + user, _, is_guest = yield self.auth.get_user_by_req(request, allow_guest=True) handler = self.handlers.message_handler # Get all the current state for this room events = yield handler.get_state_events( room_id=room_id, user_id=user.to_string(), + is_guest=is_guest, ) defer.returnValue((200, events)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e6c1abfc27..59c9987202 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -311,6 +311,8 @@ class EventsStore(SQLBaseStore): self._store_room_message_txn(txn, event) elif event.type == EventTypes.Redaction: self._store_redaction(txn, event) + elif event.type == EventTypes.RoomHistoryVisibility: + self._store_history_visibility_txn(txn, event) self._store_room_members_txn( txn, diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 13441fcdce..1c79626736 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -202,6 +202,19 @@ class RoomStore(SQLBaseStore): txn, event, "content.body", event.content["body"] ) + def _store_history_visibility_txn(self, txn, event): + if hasattr(event, "content") and "history_visibility" in event.content: + sql = ( + "INSERT INTO history_visibility" + " (event_id, room_id, history_visibility)" + " VALUES (?, ?, ?)" + ) + txn.execute(sql, ( + event.event_id, + event.room_id, + event.content["history_visibility"] + )) + def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): sql = ( diff --git a/synapse/storage/schema/delta/25/history_visibility.sql b/synapse/storage/schema/delta/25/history_visibility.sql new file mode 100644 index 0000000000..9f387ed69f --- /dev/null +++ b/synapse/storage/schema/delta/25/history_visibility.sql @@ -0,0 +1,26 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This is a manual index of history_visibility content of state events, + * so that we can join on them in SELECT statements. + */ +CREATE TABLE IF NOT EXISTS history_visibility( + id INTEGER PRIMARY KEY, + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + history_visibility TEXT NOT NULL, + UNIQUE (event_id) +); diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index c728013f4c..be8ba76aae 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -158,13 +158,40 @@ class StreamStore(SQLBaseStore): defer.returnValue(results) @log_function - def get_room_events_stream(self, user_id, from_key, to_key, limit=0): - current_room_membership_sql = ( - "SELECT m.room_id FROM room_memberships as m " - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id AND c.state_key = m.user_id" - " WHERE m.user_id = ? AND m.membership = 'join'" - ) + def get_room_events_stream( + self, + user_id, + from_key, + to_key, + limit=0, + is_guest=False, + room_ids=None + ): + room_ids = room_ids or [] + room_ids = [r for r in room_ids] + if is_guest: + current_room_membership_sql = ( + "SELECT c.room_id FROM history_visibility AS h" + " INNER JOIN current_state_events AS c" + " ON h.event_id = c.event_id" + " WHERE c.room_id IN (%s) AND h.history_visibility = 'world_readable'" % ( + ",".join(map(lambda _: "?", room_ids)) + ) + ) + current_room_membership_args = room_ids + else: + current_room_membership_sql = ( + "SELECT m.room_id FROM room_memberships as m " + " INNER JOIN current_state_events as c" + " ON m.event_id = c.event_id AND c.state_key = m.user_id" + " WHERE m.user_id = ? AND m.membership = 'join'" + ) + current_room_membership_args = [user_id] + if room_ids: + current_room_membership_sql += " AND m.room_id in (%s)" % ( + ",".join(map(lambda _: "?", room_ids)) + ) + current_room_membership_args = [user_id] + room_ids # We also want to get any membership events about that user, e.g. # invites or leave notifications. @@ -173,6 +200,7 @@ class StreamStore(SQLBaseStore): "INNER JOIN current_state_events as c ON m.event_id = c.event_id " "WHERE m.user_id = ? " ) + membership_args = [user_id] if limit: limit = max(limit, MAX_STREAM_SIZE) @@ -199,7 +227,9 @@ class StreamStore(SQLBaseStore): } def f(txn): - txn.execute(sql, (False, user_id, user_id, from_id.stream, to_id.stream,)) + args = ([False] + current_room_membership_args + membership_args + + [from_id.stream, to_id.stream]) + txn.execute(sql, args) rows = self.cursor_to_dict(txn) diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index 29372d488a..10d4482cce 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -650,9 +650,30 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): {"presence": ONLINE} ) + # Apple sees self-reflection even without room_id + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + ) + + self.assertEquals(self.event_source.get_current_key(), 1) + self.assertEquals(events, + [ + {"type": "m.presence", + "content": { + "user_id": "@apple:test", + "presence": ONLINE, + "last_active_ago": 0, + }}, + ], + msg="Presence event should be visible to self-reflection" + ) + # Apple sees self-reflection - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -684,8 +705,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) # Banana sees it because of presence subscription - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_banana, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_banana, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -702,8 +725,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) # Elderberry sees it because of same room - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_elderberry, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_elderberry, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -720,8 +745,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) # Durian is not in the room, should not see this event - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_durian, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_durian, + from_key=0, + room_ids=[], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -767,8 +794,9 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): "accepted": True}, ], presence) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 1, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=1, ) self.assertEquals(self.event_source.get_current_key(), 2) @@ -858,8 +886,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) ) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -905,8 +935,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): self.assertEquals(self.event_source.get_current_key(), 1) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id,] ) self.assertEquals(events, [ @@ -932,8 +964,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): self.assertEquals(self.event_source.get_current_key(), 2) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id,] ) self.assertEquals(events, [ @@ -966,8 +1000,9 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): self.room_members.append(self.u_clementine) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, ) self.assertEquals(self.event_source.get_current_key(), 1) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 41bb08b7ca..2d7ba43561 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -187,7 +187,10 @@ class TypingNotificationsTestCase(unittest.TestCase): ]) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ @@ -250,7 +253,10 @@ class TypingNotificationsTestCase(unittest.TestCase): ]) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0 + ) self.assertEquals( events[0], [ @@ -306,7 +312,10 @@ class TypingNotificationsTestCase(unittest.TestCase): yield put_json.await_calls() self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ @@ -337,7 +346,10 @@ class TypingNotificationsTestCase(unittest.TestCase): self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ @@ -356,7 +368,10 @@ class TypingNotificationsTestCase(unittest.TestCase): ]) self.assertEquals(self.event_source.get_current_key(), 2) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 1, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=1, + ) self.assertEquals( events[0], [ @@ -383,7 +398,10 @@ class TypingNotificationsTestCase(unittest.TestCase): self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 3) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ diff --git a/tests/rest/client/v1/test_presence.py b/tests/rest/client/v1/test_presence.py index 3e0f294630..7f29d73d95 100644 --- a/tests/rest/client/v1/test_presence.py +++ b/tests/rest/client/v1/test_presence.py @@ -47,7 +47,14 @@ class NullSource(object): def __init__(self, hs): pass - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events( + self, + user, + from_key, + room_ids=None, + limit=None, + is_guest=None + ): return defer.succeed(([], from_key)) def get_current_key(self, direction='f'): diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 8433585616..61b9cc743b 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -116,7 +116,10 @@ class RoomTypingTestCase(RestTestCase): self.assertEquals(200, code) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.user, 0, None) + events = yield self.event_source.get_new_events( + from_key=0, + room_ids=[self.room_id], + ) self.assertEquals( events[0], [ -- cgit 1.5.1 From 7301e05122e07f6513916e8a35bf05581de6521d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Nov 2015 14:34:37 +0000 Subject: Implement basic pagination for search results --- synapse/handlers/search.py | 78 +++++++++++++++++++++++++++++++++++------- synapse/rest/client/v1/room.py | 3 +- synapse/storage/search.py | 55 ++++++++++------------------- 3 files changed, 86 insertions(+), 50 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 28f5300dc9..696780f34e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -22,6 +22,8 @@ from synapse.api.filtering import Filter from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event +from unpaddedbase64 import decode_base64, encode_base64 + import logging @@ -34,17 +36,32 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def search(self, user, content): + def search(self, user, content, batch=None): """Performs a full text search for a user. Args: user (UserID) content (dict): Search parameters + batch (str): The next_batch parameter. Used for pagination. Returns: dict to be returned to the client with results of search """ + batch_group = None + batch_group_key = None + batch_token = None + if batch: + try: + b = decode_base64(batch) + batch_group, batch_group_key, batch_token = b.split("\n") + + assert batch_group is not None + assert batch_group_key is not None + assert batch_token is not None + except: + raise SynapseError(400, "Invalid batch") + try: room_cat = content["search_categories"]["room_events"] search_term = room_cat["search_term"] @@ -91,17 +108,25 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) + if batch_group == "room_id": + room_ids = room_ids & {batch_group_key} + rank_map = {} allowed_events = [] room_groups = {} sender_group = {} + global_next_batch = None if order_by == "rank": - rank_map, event_map, _ = yield self.store.search_msgs( + results = yield self.store.search_msgs( room_ids, search_term, keys ) - filtered_events = search_filter.filter(event_map.values()) + results_map = {r["event"].event_id: r for r in results} + + rank_map.update({r["event"].event_id: r["rank"] for r in results}) + + filtered_events = search_filter.filter([r["event"] for r in results]) events = yield self._filter_events_for_client( user.to_string(), filtered_events @@ -126,18 +151,26 @@ class SearchHandler(BaseHandler): elif order_by == "recent": for room_id in room_ids: room_events = [] - pagination_token = None + if batch_group == "room_id" and batch_group_key == room_id: + pagination_token = batch_token + else: + pagination_token = None i = 0 while len(room_events) < search_filter.limit() and i < 5: i += 5 - r_map, event_map, pagination_token = yield self.store.search_room( + results = yield self.store.search_room( room_id, search_term, keys, search_filter.limit() * 2, pagination_token=pagination_token, ) - rank_map.update(r_map) - filtered_events = search_filter.filter(event_map.values()) + results_map = {r["event"].event_id: r for r in results} + + rank_map.update({r["event"].event_id: r["rank"] for r in results}) + + filtered_events = search_filter.filter([ + r["event"] for r in results + ]) events = yield self._filter_events_for_client( user.to_string(), filtered_events @@ -146,13 +179,26 @@ class SearchHandler(BaseHandler): room_events.extend(events) room_events = room_events[:search_filter.limit()] - if len(event_map) < search_filter.limit() * 2: + if len(results) < search_filter.limit() * 2: + pagination_token = None break + else: + pagination_token = results[-1]["pagination_token"] + + if room_events: + res = results_map[room_events[-1].event_id] + pagination_token = res["pagination_token"] if room_events: group = room_groups.setdefault(room_id, {}) if pagination_token: - group["next_batch"] = pagination_token + next_batch = encode_base64("%s\n%s\n%s" % ( + "room_id", room_id, pagination_token + )) + group["next_batch"] = next_batch + + if batch_token: + global_next_batch = next_batch group["results"] = [e.event_id for e in room_events] group["order"] = max( @@ -164,11 +210,14 @@ class SearchHandler(BaseHandler): # Normalize the group ranks if room_groups: - mx = max(g["order"] for g in room_groups.values()) - mn = min(g["order"] for g in room_groups.values()) + if len(room_groups) > 1: + mx = max(g["order"] for g in room_groups.values()) + mn = min(g["order"] for g in room_groups.values()) - for g in room_groups.values(): - g["order"] = (g["order"] - mn) * 1.0 / (mx - mn) + for g in room_groups.values(): + g["order"] = (g["order"] - mn) * 1.0 / (mx - mn) + else: + room_groups.values()[0]["order"] = 1 else: # We should never get here due to the guard earlier. @@ -239,6 +288,9 @@ class SearchHandler(BaseHandler): if sender_group and "sender" in group_keys: rooms_cat_res.setdefault("groups", {})["sender"] = sender_group + if global_next_batch: + rooms_cat_res["next_batch"] = global_next_batch + defer.returnValue({ "search_categories": { "room_events": rooms_cat_res diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 2dcaee86cd..8e28f12d29 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -601,7 +601,8 @@ class SearchRestServlet(ClientV1RestServlet): content = _parse_json(request) - results = yield self.handlers.search_handler.search(auth_user, content) + batch = request.args.get("next_batch", [None])[0] + results = yield self.handlers.search_handler.search(auth_user, content, batch) defer.returnValue((200, results)) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e37e56c1f2..7342e7bae6 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -18,24 +18,12 @@ from twisted.internet import defer from _base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine -from collections import namedtuple - import logging logger = logging.getLogger(__name__) -"""The result of a search. - -Fields: - rank_map (dict): Mapping event_id -> rank - event_map (dict): Mapping event_id -> event - pagination_token (str): Pagination token -""" -SearchResult = namedtuple("SearchResult", ("rank_map", "event_map", "pagination_token")) - - class SearchStore(SQLBaseStore): @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): @@ -48,7 +36,7 @@ class SearchStore(SQLBaseStore): "content.body", "content.name", "content.topic" Returns: - SearchResult + list of dicts """ clauses = [] args = [] @@ -106,15 +94,14 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue(SearchResult( + defer.returnValue([ { - r["event_id"]: r["rank"] - for r in results - if r["event_id"] in event_map - }, - event_map, - None - )) + "event": event_map[r["event_id"]], + "rank": r["rank"], + } + for r in results + if r["event_id"] in event_map + ]) @defer.inlineCallbacks def search_room(self, room_id, search_term, keys, limit, pagination_token=None): @@ -128,7 +115,7 @@ class SearchStore(SQLBaseStore): pagination_token (str): A pagination token previously returned Returns: - SearchResult + list of dicts """ clauses = [] args = [search_term, room_id] @@ -190,18 +177,14 @@ class SearchStore(SQLBaseStore): for ev in events } - pagination_token = None - if results: - topo = results[-1]["topological_ordering"] - stream = results[-1]["stream_ordering"] - pagination_token = "%s,%s" % (topo, stream) - - defer.returnValue(SearchResult( + defer.returnValue([ { - r["event_id"]: r["rank"] - for r in results - if r["event_id"] in event_map - }, - event_map, - pagination_token - )) + "event": event_map[r["event_id"]], + "rank": r["rank"], + "pagination_token": "%s,%s" % ( + r["topological_ordering"], r["stream_ordering"] + ), + } + for r in results + if r["event_id"] in event_map + ]) -- cgit 1.5.1 From 3640ddfbf6641a79b486d3847ae739c974c62c7a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Nov 2015 16:10:54 +0000 Subject: Error handling --- synapse/storage/search.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7342e7bae6..3cea2011fa 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,6 +16,7 @@ from twisted.internet import defer from _base import SQLBaseStore +from synapse.api.errors import SynapseError from synapse.storage.engines import PostgresEngine, Sqlite3Engine import logging @@ -130,7 +131,13 @@ class SearchStore(SQLBaseStore): ) if pagination_token: - topo, stream = pagination_token.split(",") + try: + topo, stream = pagination_token.split(",") + topo = int(topo) + stream = int(stream) + except: + raise SynapseError(400, "Invalid pagination token") + clauses.append( "(topological_ordering < ?" " OR (topological_ordering = ? AND stream_ordering < ?))" -- cgit 1.5.1 From f2c4ee41b91f1828d516df871adcfbaed46d5407 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Fri, 6 Nov 2015 14:27:49 +0000 Subject: Remove accidentally added ID column --- synapse/storage/schema/delta/25/history_visibility.sql | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/25/history_visibility.sql b/synapse/storage/schema/delta/25/history_visibility.sql index 9f387ed69f..532cb05151 100644 --- a/synapse/storage/schema/delta/25/history_visibility.sql +++ b/synapse/storage/schema/delta/25/history_visibility.sql @@ -18,7 +18,6 @@ * so that we can join on them in SELECT statements. */ CREATE TABLE IF NOT EXISTS history_visibility( - id INTEGER PRIMARY KEY, event_id TEXT NOT NULL, room_id TEXT NOT NULL, history_visibility TEXT NOT NULL, -- cgit 1.5.1 From 767c20a869933204b6e545c6f1495cd7cd298a87 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 6 Nov 2015 20:49:57 +0100 Subject: add a key existence check to tags_by_room to avoid /events 500'ing when testing against vector --- synapse/storage/tags.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 641ea250f0..73babd53d9 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -95,7 +95,8 @@ class TagsStore(SQLBaseStore): if room_ids: tags_by_room = yield self.get_tags_for_user(user_id) for room_id in room_ids: - results[room_id] = tags_by_room[room_id] + if room_id in tags_by_room: + results[room_id] = tags_by_room[room_id] defer.returnValue(results) -- cgit 1.5.1 From dd40fb68e455b9a736f1871a19119eb0391cd0d5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 8 Nov 2015 16:04:37 +0000 Subject: fix comedy important missing comma breaking recent-ordered FTS on sqlite --- synapse/storage/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 3cea2011fa..91a96d2a83 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -154,7 +154,7 @@ class SearchStore(SQLBaseStore): ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( - "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" + "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id," " topological_ordering, stream_ordering" " FROM event_search" " NATURAL JOIN events" -- cgit 1.5.1 From c4135d85e116cecbc119c1243911a0b60a6452d7 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 9 Nov 2015 14:52:18 +0000 Subject: SYN-513: Include updates for rooms that have had all their tags deleted --- synapse/handlers/sync.py | 2 +- synapse/storage/tags.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5294d96466..ff766e3af5 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -272,7 +272,7 @@ class SyncHandler(BaseHandler): def private_user_data_for_room(self, room_id, tags_by_room): private_user_data = [] tags = tags_by_room.get(room_id) - if tags: + if tags is not None: private_user_data.append({ "type": "m.tag", "content": {"tags": tags}, diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 641ea250f0..bf695b7800 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -95,7 +95,7 @@ class TagsStore(SQLBaseStore): if room_ids: tags_by_room = yield self.get_tags_for_user(user_id) for room_id in room_ids: - results[room_id] = tags_by_room[room_id] + results[room_id] = tags_by_room.get(room_id, {}) defer.returnValue(results) -- cgit 1.5.1 From c6a01f2ed0da9f0db169307e7d1649021f0c2123 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 9 Nov 2015 14:37:28 +0000 Subject: Add storage module for tracking background updates. The progress for each background update is stored as a JSON blob in the database. Each background update is broken up into separate batches. The batch size is automatically tuned to try avoid blocking single threaded databases for too long. --- synapse/storage/background_updates.py | 210 +++++++++++++++++++++ .../schema/delta/25/00background_updates.sql | 21 +++ 2 files changed, 231 insertions(+) create mode 100644 synapse/storage/background_updates.py create mode 100644 synapse/storage/schema/delta/25/00background_updates.sql (limited to 'synapse/storage') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py new file mode 100644 index 0000000000..32a233c213 --- /dev/null +++ b/synapse/storage/background_updates.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import SQLBaseStore + +from twisted.internet import defer + +import ujson as json +import logging + +logger = logging.getLogger(__name__) + + +class BackgroundUpdatePerformance(object): + """Tracks the how long a background update is taking to update its items""" + + def __init__(self, name): + self.name = name + self.total_item_count = 0 + self.total_duration_ms = 0 + self.avg_item_count = 0 + self.avg_duration_ms = 0 + + def update(self, item_count, duration_ms): + """Update the stats after doing an update""" + self.total_item_count += item_count + self.total_duration_ms += duration_ms + + # Exponential moving averages for the number of items updated and + # the duration. + self.avg_item_count += 0.1 * (item_count - self.avg_item_count) + self.avg_duration_ms += 0.1 * (duration_ms - self.avg_duration_ms) + + def average_duration_ms_per_item(self): + """An estimate of how long it takes to do a single update. + Returns: + A duration in ms as a float + """ + if self.total_item_count == 0: + return None + else: + # Use the exponential moving average so that we can adapt to + # changes in how long the update process takes. + return float(self.avg_duration_ms) / float(self.avg_item_count) + + +class BackgroundUpdateStore(SQLBaseStore): + """ Background updates are updates to the database that run in the + background. Each update processes a batch of data at once. We attempt to + limit the impact of each update by monitoring how long each batch takes to + process and autotuning the batch size. + """ + + MINIMUM_BACKGROUND_BATCH_SIZE = 100 + DEFAULT_BACKGROUND_BATCH_SIZE = 100 + + def __init__(self, hs): + super(BackgroundUpdateStore, self).__init__(hs) + self._background_update_performance = {} + self._background_update_queue = [] + self._background_update_handlers = {} + + @defer.inlineCallbacks + def do_background_update(self, desired_duration_ms): + """Does some amount of work on a background update + Args: + desired_duration_ms(float): How long we want to spend + updating. + Returns: + A deferred that completes once some amount of work is done. + The deferred will have a value of None if there is currently + no more work to do. + """ + if not self._background_update_queue: + updates = yield self._simple_select_list( + "background_updates", + keyvalues=None, + retcols=("update_name",), + ) + for update in updates: + self._background_update_queue.append(update['update_name']) + + if not self._background_update_queue: + defer.returnValue(None) + + update_name = self._background_update_queue.pop(0) + self._background_update_queue.append(update_name) + + update_handler = self._background_update_handlers[update_name] + + performance = self._background_update_performance.get(update_name) + + if performance is None: + performance = BackgroundUpdatePerformance(update_name) + self._background_update_performance[update_name] = performance + + duration_ms_per_item = performance.average_duration_ms_per_item() + + if duration_ms_per_item is not None: + batch_size = int(desired_duration_ms / duration_ms_per_item) + # Clamp the batch size so that we always make progress + batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE) + else: + batch_size = self.DEFAULT_BACKGROUND_BATCH_SIZE + + progress_json = yield self._simple_select_one_onecol( + "background_updates", + keyvalues={"update_name": update_name}, + retcol="progress_json" + ) + + progress = json.loads(progress_json) + + time_start = self._clock.time_msec() + items_updated = yield update_handler(progress, batch_size) + time_stop = self._clock.time_msec() + + duration_ms = time_stop - time_start + + logger.info( + "Updating %. Updated %r items in %rms", + update_name, items_updated, duration_ms + ) + + performance.update(items_updated, duration_ms) + + defer.returnValue(len(self._background_update_performance)) + + def register_background_update_handler(self, update_name, update_handler): + """Register a handler for doing a background update. + + The handler should take two arguments: + + * A dict of the current progress + * An integer count of the number of items to update in this batch. + + The handler should return a deferred integer count of items updated. + The hander is responsible for updating the progress of the update. + + Args: + update_name(str): The name of the update that this code handles. + update_handler(function): The function that does the update. + """ + self._background_update_handlers[update_name] = update_handler + + def start_background_update(self, update_name, progress): + """Starts a background update running. + + Args: + update_name: The update to set running. + progress: The initial state of the progress of the update. + + Returns: + A deferred that completes once the task has been added to the + queue. + """ + # Clear the background update queue so that we will pick up the new + # task on the next iteration of do_background_update. + self._background_update_queue = [] + progress_json = json.dumps(progress) + + return self._simple_insert( + "background_updates", + {"update_name": update_name, "progress_json": progress_json} + ) + + def _end_background_update(self, update_name): + """Removes a completed background update task from the queue. + + Args: + update_name(str): The name of the completed task to remove + Returns: + A deferred that completes once the task is removed. + """ + self._background_update_queue = [ + name for name in self._background_update_queue if name != update_name + ] + return self._simple_delete_one( + "background_updates", keyvalues={"update_name": update_name} + ) + + def _background_update_progress_txn(self, txn, update_name, progress): + """Update the progress of a background update + + Args: + txn(cursor): The transaction. + update_name(str): The name of the background update task + progress(dict): The progress of the update. + """ + + progress_json = json.dumps(progress) + + self._simple_update_one_txn( + txn, + "background_updates", + keyvalues={"update_name": update_name}, + updatevalues={"progress_json": progress_json}, + ) diff --git a/synapse/storage/schema/delta/25/00background_updates.sql b/synapse/storage/schema/delta/25/00background_updates.sql new file mode 100644 index 0000000000..41a9b59b1b --- /dev/null +++ b/synapse/storage/schema/delta/25/00background_updates.sql @@ -0,0 +1,21 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE IF NOT EXISTS background_updates( + update_name TEXT NOT NULL, -- The name of the background update. + progress_json TEXT NOT NULL, -- The current progress of the update as JSON. + CONSTRAINT background_updates_uniqueness UNIQUE (update_name) +); -- cgit 1.5.1 From 2ede7aa8a1da20b735797cc9230da7bbeb55efc3 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 9 Nov 2015 19:29:32 +0000 Subject: Add background update task for reindexing event search --- synapse/storage/search.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 3cea2011fa..f7c269865d 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -15,7 +15,7 @@ from twisted.internet import defer -from _base import SQLBaseStore +from .background_updates import BackgroundUpdateStore from synapse.api.errors import SynapseError from synapse.storage.engines import PostgresEngine, Sqlite3Engine @@ -25,7 +25,101 @@ import logging logger = logging.getLogger(__name__) -class SearchStore(SQLBaseStore): +class SearchStore(BackgroundUpdateStore): + + EVENT_SEARCH_UPDATE_NAME = "event_search" + + + @defer.inlineCallbacks + def _background_reindex_search(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id"] + max_stream_id = progress["max_stream_id"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + TYPES = ["m.room.name", "m.room.message", "m.room.topic"] + + def reindex_search_txn(txn): + sql = ( + "SELECT stream_id, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " AND (%s)" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) % (" OR ".join("type = '%s'" % TYPES),) + + txn.execute(sql, target_min_stream_id, max_stream_id, batch_size) + + rows = txn.fetch_all() + if not rows: + return None + + min_stream_id = rows[-1][0] + event_ids = [row[1] for row in rows] + + events = self._get_events_txn(txn, event_ids) + + event_search_rows = [] + for event in events: + try: + event_id = event.event_id + room_id = event.room_id + content = event.content + if event.type == "m.room.message": + key = "content.body" + value = content["body"] + elif event.type == "m.room.topic": + key = "content.topic" + value = content["topic"] + elif event.type == "m.room.name": + key = "content.name" + value = content["name"] + except Exception: + # If the event is missing a necessary field then + # skip over it. + continue + + event_search_rows.append((event_id, room_id, key, value)) + + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): + clump = event_search_rows[index : index + INSERT_CLUMP_SIZE) + txn.execute_many(sql, clump) + + progress = { + "target_max_stream_id": target_min_stream_id, + "max_stream_id": min_stream_id, + "rows_inserted": rows_inserted + len(event_search_rows) + } + + self._background_update_progress_txn( + txn, self.EVENT_SEARCH_UPDATE_NAME, progress + ) + + return len(event_search_rows) + + result = yield self.runInteration( + self.EVENT_SEARCH_UPDATE_NAME, reindex_search_txn + ) + + if result is None: + yield _end_background_update(self.EVENT_SEARCH_UPDATE_NAME) + + defer.returnValue(result) + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. -- cgit 1.5.1 From a412b9a465cb6a64f44e3656d8645977f43c275f Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 10 Nov 2015 15:50:58 +0000 Subject: Run the background updates when starting synapse. --- synapse/app/homeserver.py | 1 + synapse/storage/background_updates.py | 57 ++++++++++++++++++++++++++++++----- synapse/storage/search.py | 11 +++++-- synapse/util/__init__.py | 8 +++++ 4 files changed, 67 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index a77535a4ee..cd7a52ec07 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -439,6 +439,7 @@ def setup(config_options): hs.get_pusherpool().start() hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() + hs.get_datastore().start_doing_background_updates() hs.get_replication_layer().start_get_pdu_cache() return hs diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 32a233c213..b6cdc6ec68 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -43,7 +43,7 @@ class BackgroundUpdatePerformance(object): self.avg_item_count += 0.1 * (item_count - self.avg_item_count) self.avg_duration_ms += 0.1 * (duration_ms - self.avg_duration_ms) - def average_duration_ms_per_item(self): + def average_items_per_ms(self): """An estimate of how long it takes to do a single update. Returns: A duration in ms as a float @@ -53,7 +53,17 @@ class BackgroundUpdatePerformance(object): else: # Use the exponential moving average so that we can adapt to # changes in how long the update process takes. - return float(self.avg_duration_ms) / float(self.avg_item_count) + return float(self.avg_item_count) / float(self.avg_duration_ms) + + def total_items_per_ms(self): + """An estimate of how long it takes to do a single update. + Returns: + A duration in ms as a float + """ + if self.total_item_count == 0: + return None + else: + return float(self.total_item_count) / float(self.total_duration_ms) class BackgroundUpdateStore(SQLBaseStore): @@ -65,12 +75,41 @@ class BackgroundUpdateStore(SQLBaseStore): MINIMUM_BACKGROUND_BATCH_SIZE = 100 DEFAULT_BACKGROUND_BATCH_SIZE = 100 + BACKGROUND_UPDATE_INTERVAL_MS = 1000 + BACKGROUND_UPDATE_DURATION_MS = 100 def __init__(self, hs): super(BackgroundUpdateStore, self).__init__(hs) self._background_update_performance = {} self._background_update_queue = [] self._background_update_handlers = {} + self._background_update_timer = None + + @defer.inlineCallbacks + def start_doing_background_updates(self): + while True: + if self._background_update_timer is not None: + return + + sleep = defer.Deferred() + self._background_update_timer = self._clock.call_later( + self.BACKGROUND_UPDATE_INTERVAL_MS / 1000., sleep.callback + ) + try: + yield sleep + finally: + self._background_update_timer = None + + result = yield self.do_background_update( + self.BACKGROUND_UPDATE_DURATION_MS + ) + + if result is None: + logger.info( + "No more background updates to do." + " Unscheduling background update task." + ) + return @defer.inlineCallbacks def do_background_update(self, desired_duration_ms): @@ -106,10 +145,10 @@ class BackgroundUpdateStore(SQLBaseStore): performance = BackgroundUpdatePerformance(update_name) self._background_update_performance[update_name] = performance - duration_ms_per_item = performance.average_duration_ms_per_item() + items_per_ms = performance.average_items_per_ms() - if duration_ms_per_item is not None: - batch_size = int(desired_duration_ms / duration_ms_per_item) + if items_per_ms is not None: + batch_size = int(desired_duration_ms * items_per_ms) # Clamp the batch size so that we always make progress batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE) else: @@ -130,8 +169,12 @@ class BackgroundUpdateStore(SQLBaseStore): duration_ms = time_stop - time_start logger.info( - "Updating %. Updated %r items in %rms", - update_name, items_updated, duration_ms + "Updating %. Updated %r items in %rms." + " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)", + update_name, items_updated, duration_ms, + performance.total_items_per_ms(), + performance.average_items_per_ms(), + performance.total_item_count, ) performance.update(items_updated, duration_ms) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f7c269865d..d170c546b5 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -29,6 +29,11 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" + def __init__(self, hs): + super(SearchStore, self).__init__(hs) + self.register_background_update_handler( + self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search + ) @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): @@ -74,7 +79,7 @@ class SearchStore(BackgroundUpdateStore): elif event.type == "m.room.name": key = "content.name" value = content["name"] - except Exception: + except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. continue @@ -96,7 +101,7 @@ class SearchStore(BackgroundUpdateStore): raise Exception("Unrecognized database engine") for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index : index + INSERT_CLUMP_SIZE) + clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] txn.execute_many(sql, clump) progress = { @@ -116,7 +121,7 @@ class SearchStore(BackgroundUpdateStore): ) if result is None: - yield _end_background_update(self.EVENT_SEARCH_UPDATE_NAME) + yield self._end_background_update(self.EVENT_SEARCH_UPDATE_NAME) defer.returnValue(result) diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index 1d123ccefc..d69c7cb991 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -53,6 +53,14 @@ class Clock(object): loop.stop() def call_later(self, delay, callback, *args, **kwargs): + """Call something later + + Args: + delay(float): How long to wait in seconds. + callback(function): Function to call + *args: Postional arguments to pass to function. + **kwargs: Key arguments to pass to function. + """ current_context = LoggingContext.current_context() def wrapped_callback(*args, **kwargs): -- cgit 1.5.1 From 90b503216c40974dc4925a9bbb673779a039143c Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 10 Nov 2015 16:20:13 +0000 Subject: Use a background task to update databases to use the full text search --- synapse/storage/schema/delta/25/fts.py | 100 ++++++++------------------------- synapse/storage/search.py | 8 +-- 2 files changed, 28 insertions(+), 80 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index b7cd0ce3b8..e408d36da0 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -22,7 +22,7 @@ import ujson logger = logging.getLogger(__name__) -POSTGRES_SQL = """ +POSTGRES_TABLE = """ CREATE TABLE IF NOT EXISTS event_search ( event_id TEXT, room_id TEXT, @@ -31,22 +31,6 @@ CREATE TABLE IF NOT EXISTS event_search ( vector tsvector ); -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.body', - to_tsvector('english', json::json->'content'->>'body') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; - -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.name', - to_tsvector('english', json::json->'content'->>'name') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; - -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.topic', - to_tsvector('english', json::json->'content'->>'topic') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; - - CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); CREATE INDEX event_search_ev_idx ON event_search(event_id); CREATE INDEX event_search_ev_ridx ON event_search(room_id); @@ -61,67 +45,31 @@ SQLITE_TABLE = ( def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): - run_postgres_upgrade(cur) + for statement in get_statements(POSTGRES_TABLE.splitlines()): + cur.execute(statement) return if isinstance(database_engine, Sqlite3Engine): - run_sqlite_upgrade(cur) - return - - -def run_postgres_upgrade(cur): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) - - -def run_sqlite_upgrade(cur): cur.execute(SQLITE_TABLE) + return - rowid = -1 - while True: - cur.execute( - "SELECT rowid, json FROM event_json" - " WHERE rowid > ?" - " ORDER BY rowid ASC LIMIT 100", - (rowid,) - ) - - res = cur.fetchall() - - if not res: - break - - events = [ - ujson.loads(js) - for _, js in res - ] - - rowid = max(rid for rid, _ in res) - - rows = [] - for ev in events: - content = ev.get("content", {}) - body = content.get("body", None) - name = content.get("name", None) - topic = content.get("topic", None) - sender = ev.get("sender", None) - if ev["type"] == "m.room.message" and body: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.body", body - )) - if ev["type"] == "m.room.name" and name: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.name", name - )) - if ev["type"] == "m.room.topic" and topic: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.topic", topic - )) - - if rows: - logger.info(rows) - cur.executemany( - "INSERT INTO event_search (event_id, room_id, sender, key, value)" - " VALUES (?,?,?,?,?)", - rows - ) + cur.execute("SELECT MIN(stream_ordering) FROM events") + rows = cur.fetchall() + min_stream_id = rows[0][0] + + cur.execute("SELECT MAX(stream_ordering) FROM events") + rows = cur.fetchall() + max_stream_id = rows[0][0] + + if min_stream_id is not None and max_stream_id is not None: + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + "rows_inserted": 0, + } + progress_json = ujson.dumps(progress) + + cur.execute( + "INSERT into background_updates (update_name, progress_json)" + " VALUES (?, ?)", ("event_search", progress_json) + ) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index d170c546b5..3b19b118eb 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -37,8 +37,8 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id"] - max_stream_id = progress["max_stream_id"] + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) INSERT_CLUMP_SIZE = 1000 @@ -105,8 +105,8 @@ class SearchStore(BackgroundUpdateStore): txn.execute_many(sql, clump) progress = { - "target_max_stream_id": target_min_stream_id, - "max_stream_id": min_stream_id, + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, "rows_inserted": rows_inserted + len(event_search_rows) } -- cgit 1.5.1 From cf437900e0c689aad40f3da89866cf84c0f7ef65 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 10 Nov 2015 17:10:27 +0000 Subject: Return world_readable and guest_can_join in /publicRooms --- synapse/storage/events.py | 2 + synapse/storage/room.py | 71 ++++++++++++++---------- synapse/storage/schema/delta/25/guest_access.sql | 25 +++++++++ tests/storage/test_room.py | 2 + 4 files changed, 71 insertions(+), 29 deletions(-) create mode 100644 synapse/storage/schema/delta/25/guest_access.sql (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 59c9987202..4a365ff639 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -313,6 +313,8 @@ class EventsStore(SQLBaseStore): self._store_redaction(txn, event) elif event.type == EventTypes.RoomHistoryVisibility: self._store_history_visibility_txn(txn, event) + elif event.type == EventTypes.GuestAccess: + self._store_guest_access_txn(txn, event) self._store_room_members_txn( txn, diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 1c79626736..4f08df478c 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -99,34 +99,39 @@ class RoomStore(SQLBaseStore): """ def f(txn): - topic_subquery = ( - "SELECT topics.event_id as event_id, " - "topics.room_id as room_id, topic " - "FROM topics " - "INNER JOIN current_state_events as c " - "ON c.event_id = topics.event_id " - ) - - name_subquery = ( - "SELECT room_names.event_id as event_id, " - "room_names.room_id as room_id, name " - "FROM room_names " - "INNER JOIN current_state_events as c " - "ON c.event_id = room_names.event_id " - ) + def subquery(table_name, column_name=None): + column_name = column_name or table_name + return ( + "SELECT %(table_name)s.event_id as event_id, " + "%(table_name)s.room_id as room_id, %(column_name)s " + "FROM %(table_name)s " + "INNER JOIN current_state_events as c " + "ON c.event_id = %(table_name)s.event_id " % { + "column_name": column_name, + "table_name": table_name, + } + ) - # We use non printing ascii character US (\x1F) as a separator sql = ( - "SELECT r.room_id, max(n.name), max(t.topic)" + "SELECT" + " r.room_id," + " max(n.name)," + " max(t.topic)," + " max(v.history_visibility)," + " max(g.guest_access)" " FROM rooms AS r" " LEFT JOIN (%(topic)s) AS t ON t.room_id = r.room_id" " LEFT JOIN (%(name)s) AS n ON n.room_id = r.room_id" + " LEFT JOIN (%(history_visibility)s) AS v ON v.room_id = r.room_id" + " LEFT JOIN (%(guest_access)s) AS g ON g.room_id = r.room_id" " WHERE r.is_public = ?" - " GROUP BY r.room_id" - ) % { - "topic": topic_subquery, - "name": name_subquery, - } + " GROUP BY r.room_id" % { + "topic": subquery("topics", "topic"), + "name": subquery("room_names", "name"), + "history_visibility": subquery("history_visibility"), + "guest_access": subquery("guest_access"), + } + ) txn.execute(sql, (is_public,)) @@ -156,10 +161,12 @@ class RoomStore(SQLBaseStore): "room_id": r[0], "name": r[1], "topic": r[2], - "aliases": r[3], + "world_readable": r[3] == "world_readable", + "guest_can_join": r[4] == "can_join", + "aliases": r[5], } for r in rows - if r[3] # We only return rooms that have at least one alias. + if r[5] # We only return rooms that have at least one alias. ] defer.returnValue(ret) @@ -203,16 +210,22 @@ class RoomStore(SQLBaseStore): ) def _store_history_visibility_txn(self, txn, event): - if hasattr(event, "content") and "history_visibility" in event.content: + self._store_content_index_txn(txn, event, "history_visibility") + + def _store_guest_access_txn(self, txn, event): + self._store_content_index_txn(txn, event, "guest_access") + + def _store_content_index_txn(self, txn, event, key): + if hasattr(event, "content") and key in event.content: sql = ( - "INSERT INTO history_visibility" - " (event_id, room_id, history_visibility)" - " VALUES (?, ?, ?)" + "INSERT INTO %(key)s" + " (event_id, room_id, %(key)s)" + " VALUES (?, ?, ?)" % {"key": key} ) txn.execute(sql, ( event.event_id, event.room_id, - event.content["history_visibility"] + event.content[key] )) def _store_event_search_txn(self, txn, event, key, value): diff --git a/synapse/storage/schema/delta/25/guest_access.sql b/synapse/storage/schema/delta/25/guest_access.sql new file mode 100644 index 0000000000..bdb90e7118 --- /dev/null +++ b/synapse/storage/schema/delta/25/guest_access.sql @@ -0,0 +1,25 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This is a manual index of guest_access content of state events, + * so that we can join on them in SELECT statements. + */ +CREATE TABLE IF NOT EXISTS guest_access( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + guest_access TEXT NOT NULL, + UNIQUE (event_id) +); diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py index caffce64e3..91c967548d 100644 --- a/tests/storage/test_room.py +++ b/tests/storage/test_room.py @@ -73,6 +73,8 @@ class RoomStoreTestCase(unittest.TestCase): "room_id": self.room.to_string(), "topic": None, "aliases": [self.alias.to_string()], + "world_readable": False, + "guest_can_join": False, }, rooms[0]) -- cgit 1.5.1 From 940a16119205115c02a3b23e9f3c67a08486bae0 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 11 Nov 2015 13:59:40 +0000 Subject: Fix the background update --- synapse/storage/background_updates.py | 13 ++++++++----- synapse/storage/schema/delta/25/fts.py | 7 +++---- synapse/storage/search.py | 16 ++++++++-------- 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index b6cdc6ec68..45fccc2e5e 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -93,16 +93,19 @@ class BackgroundUpdateStore(SQLBaseStore): sleep = defer.Deferred() self._background_update_timer = self._clock.call_later( - self.BACKGROUND_UPDATE_INTERVAL_MS / 1000., sleep.callback + self.BACKGROUND_UPDATE_INTERVAL_MS / 1000., sleep.callback, None ) try: yield sleep finally: self._background_update_timer = None - result = yield self.do_background_update( - self.BACKGROUND_UPDATE_DURATION_MS - ) + try: + result = yield self.do_background_update( + self.BACKGROUND_UPDATE_DURATION_MS + ) + except: + logger.exception("Error doing update") if result is None: logger.info( @@ -169,7 +172,7 @@ class BackgroundUpdateStore(SQLBaseStore): duration_ms = time_stop - time_start logger.info( - "Updating %. Updated %r items in %rms." + "Updating %r. Updated %r items in %rms." " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)", update_name, items_updated, duration_ms, performance.total_items_per_ms(), diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index e408d36da0..ce92f59025 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -47,11 +47,10 @@ def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): for statement in get_statements(POSTGRES_TABLE.splitlines()): cur.execute(statement) - return - - if isinstance(database_engine, Sqlite3Engine): + elif isinstance(database_engine, Sqlite3Engine): cur.execute(SQLITE_TABLE) - return + else: + raise Exception("Unrecognized database engine") cur.execute("SELECT MIN(stream_ordering) FROM events") rows = cur.fetchall() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 3b19b118eb..3c0d671129 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -46,18 +46,18 @@ class SearchStore(BackgroundUpdateStore): def reindex_search_txn(txn): sql = ( - "SELECT stream_id, event_id FROM events" + "SELECT stream_ordering, event_id FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" " LIMIT ?" - ) % (" OR ".join("type = '%s'" % TYPES),) + ) % (" OR ".join("type = '%s'" % (t,) for t in TYPES),) - txn.execute(sql, target_min_stream_id, max_stream_id, batch_size) + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - rows = txn.fetch_all() + rows = txn.fetchall() if not rows: - return None + return 0 min_stream_id = rows[-1][0] event_ids = [row[1] for row in rows] @@ -102,7 +102,7 @@ class SearchStore(BackgroundUpdateStore): for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.execute_many(sql, clump) + txn.executemany(sql, clump) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -116,11 +116,11 @@ class SearchStore(BackgroundUpdateStore): return len(event_search_rows) - result = yield self.runInteration( + result = yield self.runInteraction( self.EVENT_SEARCH_UPDATE_NAME, reindex_search_txn ) - if result is None: + if not result: yield self._end_background_update(self.EVENT_SEARCH_UPDATE_NAME) defer.returnValue(result) -- cgit 1.5.1 From e1627388d15fac534e22b80c2f7cb1a4e1afacc3 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 11 Nov 2015 17:14:56 +0000 Subject: Fix param style to work on both sqlite and postgres --- synapse/storage/schema/delta/25/fts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index ce92f59025..5239d69073 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -68,7 +68,11 @@ def run_upgrade(cur, database_engine, *args, **kwargs): } progress_json = ujson.dumps(progress) - cur.execute( + sql = ( "INSERT into background_updates (update_name, progress_json)" - " VALUES (?, ?)", ("event_search", progress_json) + " VALUES (?, ?)" ) + + sql = database_engine.convert_param_style(sql) + + cur.execute(sql, ("event_search", progress_json)) -- cgit 1.5.1 From 39de87869c9ad966f382e597986e860d03f3fef2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Nov 2015 14:06:31 +0000 Subject: Fix bug where assumed dict was namedtuple --- synapse/storage/transactions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index 4e0d7c9774..ad099775eb 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -59,7 +59,7 @@ class TransactionStore(SQLBaseStore): allow_none=True, ) - if result and result.response_code: + if result and result["response_code"]: return result["response_code"], result["response_json"] else: return None -- cgit 1.5.1 From 14a9d805b959b5d2b26fea0d57794cb3ceb28958 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Nov 2015 14:07:25 +0000 Subject: Use a (hopefully) more efficient SQL query for doing recency based room search --- synapse/storage/search.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 2e88c51ad0..eac265b543 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -253,11 +253,13 @@ class SearchStore(BackgroundUpdateStore): ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( - "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id," + "SELECT rank(matchinfo) as rank, room_id, event_id," " topological_ordering, stream_ordering" - " FROM event_search" - " NATURAL JOIN events" - " WHERE value MATCH ? AND room_id = ?" + " FROM (SELECT event_id, matchinfo(event_search) FROM event_search" + " WHERE value MATCH" + " )" + " CROSS JOIN events USING (event_id)" + " WHERE room_id = ?" ) else: # This should be unreachable. -- cgit 1.5.1 From 320408ef47eb373c2b8edad7ab3d08c3fa0cb459 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Nov 2015 15:09:45 +0000 Subject: Fix SQL syntax --- synapse/storage/search.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index eac265b543..e1911e2480 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -255,8 +255,9 @@ class SearchStore(BackgroundUpdateStore): sql = ( "SELECT rank(matchinfo) as rank, room_id, event_id," " topological_ordering, stream_ordering" - " FROM (SELECT event_id, matchinfo(event_search) FROM event_search" - " WHERE value MATCH" + " FROM (SELECT key, event_id, matchinfo(event_search) as matchinfo" + " FROM event_search" + " WHERE value MATCH ?" " )" " CROSS JOIN events USING (event_id)" " WHERE room_id = ?" -- cgit 1.5.1 From 764e79d0514a0cce9dc456df0355108d40dbeda8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Nov 2015 15:19:56 +0000 Subject: Comment --- synapse/storage/search.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e1911e2480..0b00ddb9db 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -252,6 +252,8 @@ class SearchStore(BackgroundUpdateStore): " WHERE vector @@ query AND room_id = ?" ) elif isinstance(self.database_engine, Sqlite3Engine): + # We use CROSS JOIN here to ensure we use the right indexes. + # https://sqlite.org/optoverview.html#crossjoin sql = ( "SELECT rank(matchinfo) as rank, room_id, event_id," " topological_ordering, stream_ordering" -- cgit 1.5.1 From 8fd8e72cec8df94403441664d8eecc25fa8d363f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Nov 2015 15:33:47 +0000 Subject: Expand comment --- synapse/storage/search.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 0b00ddb9db..dcc5ac65a3 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -254,6 +254,12 @@ class SearchStore(BackgroundUpdateStore): elif isinstance(self.database_engine, Sqlite3Engine): # We use CROSS JOIN here to ensure we use the right indexes. # https://sqlite.org/optoverview.html#crossjoin + # + # We want to use the full text search index on event_search to + # extract all possible matches first, then lookup those matches + # in the events table to get the topological ordering. We need + # to use the indexes in this order because sqlite refuses to + # MATCH unless it uses the full text search index sql = ( "SELECT rank(matchinfo) as rank, room_id, event_id," " topological_ordering, stream_ordering" -- cgit 1.5.1 From 3de46c7755ac5e061fc13fb916c13b841b65f2b5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Nov 2015 15:36:43 +0000 Subject: Trailing whitespace --- synapse/storage/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dcc5ac65a3..380270b009 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -258,7 +258,7 @@ class SearchStore(BackgroundUpdateStore): # We want to use the full text search index on event_search to # extract all possible matches first, then lookup those matches # in the events table to get the topological ordering. We need - # to use the indexes in this order because sqlite refuses to + # to use the indexes in this order because sqlite refuses to # MATCH unless it uses the full text search index sql = ( "SELECT rank(matchinfo) as rank, room_id, event_id," -- cgit 1.5.1 From fddedd51d9ca44f385d412e8c2e3e8d99325ba67 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 10 Nov 2015 18:27:23 +0000 Subject: Fix a few race conditions in the state calculation Be a bit more careful about how we calculate the state to be returned by /sync. In a few places, it was possible for /sync to return slightly later state than that represented by the next_batch token and the timeline. In particular, the following cases were susceptible: * On a full state sync, for an active room * During a per-room incremental sync with a timeline gap * When the user has just joined a room. (Refactor check_joined_room to make it less magical) Also, use store.get_state_for_events() (and thus the existing stategroups) to calculate the state corresponding to a particular sync position, rather than state_handler.compute_event_context(), which recalculates from first principles (and tends to miss some state). Merged from PR https://github.com/matrix-org/synapse/pull/372 --- synapse/handlers/sync.py | 123 ++++++++++++++++++++++++----------------------- synapse/storage/state.py | 14 ++++++ 2 files changed, 77 insertions(+), 60 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 8b154fa7e7..6dc9d0fb92 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -254,9 +254,7 @@ class SyncHandler(BaseHandler): room_id, sync_config, now_token, since_token=timeline_since_token ) - current_state = yield self.state_handler.get_current_state( - room_id - ) + current_state = yield self.get_state_at(room_id, now_token) defer.returnValue(JoinedSyncResult( room_id=room_id, @@ -353,14 +351,12 @@ class SyncHandler(BaseHandler): room_id, sync_config, leave_token, since_token=timeline_since_token ) - leave_state = yield self.store.get_state_for_events( - [leave_event_id], None - ) + leave_state = yield self.store.get_state_for_event(leave_event_id) defer.returnValue(ArchivedSyncResult( room_id=room_id, timeline=batch, - state=leave_state[leave_event_id], + state=leave_state, private_user_data=self.private_user_data_for_room( room_id, tags_by_room ), @@ -424,6 +420,9 @@ class SyncHandler(BaseHandler): if len(room_events) <= timeline_limit: # There is no gap in any of the rooms. Therefore we can just # partition the new events by room and return them. + logger.debug("Got %i events for incremental sync - not limited", + len(room_events)) + invite_events = [] leave_events = [] events_by_room_id = {} @@ -439,9 +438,11 @@ class SyncHandler(BaseHandler): for room_id in joined_room_ids: recents = events_by_room_id.get(room_id, []) + logger.debug("Events for room %s: %r", room_id, recents) state = { (event.type, event.state_key): event for event in recents if event.is_state()} + limited = False if recents: prev_batch = now_token.copy_and_replace( @@ -450,9 +451,13 @@ class SyncHandler(BaseHandler): else: prev_batch = now_token - state, limited = yield self.check_joined_room( - sync_config, room_id, state - ) + just_joined = yield self.check_joined_room(sync_config, state) + if just_joined: + logger.debug("User has just joined %s: needs full state", + room_id) + state = yield self.get_state_at(room_id, now_token) + # the timeline is inherently limited if we've just joined + limited = True room_sync = JoinedSyncResult( room_id=room_id, @@ -467,10 +472,15 @@ class SyncHandler(BaseHandler): room_id, tags_by_room ), ) + logger.debug("Result for room %s: %r", room_id, room_sync) + if room_sync: joined.append(room_sync) else: + logger.debug("Got %i events for incremental sync - hit limit", + len(room_events)) + invite_events = yield self.store.get_invites_for_user( sync_config.user.to_string() ) @@ -563,6 +573,8 @@ class SyncHandler(BaseHandler): Returns: A Deferred JoinedSyncResult """ + logger.debug("Doing incremental sync for room %s between %s and %s", + room_id, since_token, now_token) # TODO(mjark): Check for redactions we might have missed. @@ -572,30 +584,26 @@ class SyncHandler(BaseHandler): logging.debug("Recents %r", batch) - # TODO(mjark): This seems racy since this isn't being passed a - # token to indicate what point in the stream this is - current_state = yield self.state_handler.get_current_state( - room_id - ) + current_state = yield self.get_state_at(room_id, now_token) - state_at_previous_sync = yield self.get_state_at_previous_sync( - room_id, since_token=since_token + state_at_previous_sync = yield self.get_state_at( + room_id, stream_position=since_token ) - state_events_delta = yield self.compute_state_delta( + state = yield self.compute_state_delta( since_token=since_token, previous_state=state_at_previous_sync, current_state=current_state, ) - state_events_delta, _ = yield self.check_joined_room( - sync_config, room_id, state_events_delta - ) + just_joined = yield self.check_joined_room(sync_config, state) + if just_joined: + state = yield self.get_state_at(room_id, now_token) room_sync = JoinedSyncResult( room_id=room_id, timeline=batch, - state=state_events_delta, + state=state, ephemeral=ephemeral_by_room.get(room_id, []), private_user_data=self.private_user_data_for_room( room_id, tags_by_room @@ -627,16 +635,12 @@ class SyncHandler(BaseHandler): logging.debug("Recents %r", batch) - # TODO(mjark): This seems racy since this isn't being passed a - # token to indicate what point in the stream this is - leave_state = yield self.store.get_state_for_events( - [leave_event.event_id], None + state_events_at_leave = yield self.store.get_state_for_event( + leave_event.event_id ) - state_events_at_leave = leave_state[leave_event.event_id] - - state_at_previous_sync = yield self.get_state_at_previous_sync( - leave_event.room_id, since_token=since_token + state_at_previous_sync = yield self.get_state_at( + leave_event.room_id, stream_position=since_token ) state_events_delta = yield self.compute_state_delta( @@ -659,26 +663,36 @@ class SyncHandler(BaseHandler): defer.returnValue(room_sync) @defer.inlineCallbacks - def get_state_at_previous_sync(self, room_id, since_token): - """ Get the room state at the previous sync the client made. - Returns: - A Deferred map from ((type, state_key)->Event) + def get_state_after_event(self, event): + """ + Get the room state after the given event + + :param synapse.events.EventBase event: event of interest + :return: A Deferred map from ((type, state_key)->Event) + """ + state = yield self.store.get_state_for_event(event.event_id) + if event.is_state(): + state = state.copy() + state[(event.type, event.state_key)] = event + defer.returnValue(state) + + @defer.inlineCallbacks + def get_state_at(self, room_id, stream_position): + """ Get the room state at a particular stream position + :param str room_id: room for which to get state + :param StreamToken stream_position: point at which to get state + :returns: A Deferred map from ((type, state_key)->Event) """ last_events, token = yield self.store.get_recent_events_for_room( - room_id, end_token=since_token.room_key, limit=1, + room_id, end_token=stream_position.room_key, limit=1, ) if last_events: - last_event = last_events[0] - last_context = yield self.state_handler.compute_event_context( - last_event - ) - if last_event.is_state(): - state = last_context.current_state.copy() - state[(last_event.type, last_event.state_key)] = last_event - else: - state = last_context.current_state + last_event = last_events[-1] + state = yield self.get_state_after_event(last_event) + else: + # no events in this room - so presumably no state state = {} defer.returnValue(state) @@ -706,31 +720,20 @@ class SyncHandler(BaseHandler): state_delta[key] = event return state_delta - @defer.inlineCallbacks - def check_joined_room(self, sync_config, room_id, state_delta): + def check_joined_room(self, sync_config, state_delta): """ - Check if the user has just joined the given room. If so, return the - full state for the room, instead of the delta since the last sync. + Check if the user has just joined the given room (so should + be given the full state) :param sync_config: - :param room_id: :param dict[(str,str), synapse.events.FrozenEvent] state_delta: the difference in state since the last sync :returns A deferred Tuple (state_delta, limited) """ - joined = False - limited = False - join_event = state_delta.get(( EventTypes.Member, sync_config.user.to_string()), None) if join_event is not None: if join_event.content["membership"] == Membership.JOIN: - joined = True - - if joined: - state_delta = yield self.state_handler.get_current_state(room_id) - # the timeline is inherently limited if we've just joined - limited = True - - defer.returnValue((state_delta, limited)) + return True + return False diff --git a/synapse/storage/state.py b/synapse/storage/state.py index acfb322a53..80e9b63f50 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -237,6 +237,20 @@ class StateStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) + @defer.inlineCallbacks + def get_state_for_event(self, event_id, types=None): + """ + Get the state dict corresponding to a particular event + + :param str event_id: event whose state should be returned + :param list[(str, str)]|None types: List of (type, state_key) tuples + which are used to filter the state fetched. May be None, which + matches any key + :return: a deferred dict from (type, state_key) -> state_event + """ + state_map = yield self.get_state_for_events([event_id], types) + defer.returnValue(state_map[event_id]) + @cached(num_args=2, lru=True, max_entries=10000) def _get_state_group_for_event(self, room_id, event_id): return self._simple_select_one_onecol( -- cgit 1.5.1 From e4d622aaaf0df503f942d016a5bf798dd52899d1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 10 Nov 2015 18:29:25 +0000 Subject: Implementation of state rollback in /sync Implementation of SPEC-254: roll back the state dictionary to how it looked at the start of the timeline. Merged PR https://github.com/matrix-org/synapse/pull/373 --- synapse/rest/client/v2_alpha/sync.py | 67 ++++++++++++++++++++++++++++++++++-- synapse/storage/events.py | 6 ++-- 2 files changed, 69 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 272a00bc85..efd8281558 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -20,6 +20,7 @@ from synapse.http.servlet import ( ) from synapse.handlers.sync import SyncConfig from synapse.types import StreamToken +from synapse.events import FrozenEvent from synapse.events.utils import ( serialize_event, format_event_for_client_v2_without_event_id, ) @@ -256,7 +257,13 @@ class SyncRestServlet(RestServlet): :rtype: dict[str, object] """ event_map = {} - state_events = filter.filter_room_state(room.state.values()) + state_dict = room.state + timeline_events = filter.filter_room_timeline(room.timeline.events) + + state_dict = SyncRestServlet._rollback_state_for_timeline( + state_dict, timeline_events) + + state_events = filter.filter_room_state(state_dict.values()) state_event_ids = [] for event in state_events: # TODO(mjark): Respect formatting requirements in the filter. @@ -266,7 +273,6 @@ class SyncRestServlet(RestServlet): ) state_event_ids.append(event.event_id) - timeline_events = filter.filter_room_timeline(room.timeline.events) timeline_event_ids = [] for event in timeline_events: # TODO(mjark): Respect formatting requirements in the filter. @@ -297,6 +303,63 @@ class SyncRestServlet(RestServlet): return result + @staticmethod + def _rollback_state_for_timeline(state, timeline): + """ + Wind the state dictionary backwards, so that it represents the + state at the start of the timeline, rather than at the end. + + :param dict[(str, str), synapse.events.EventBase] state: the + state dictionary. Will be updated to the state before the timeline. + :param list[synapse.events.EventBase] timeline: the event timeline + :return: updated state dictionary + """ + logger.debug("Processing state dict %r; timeline %r", state, + [e.get_dict() for e in timeline]) + + result = state.copy() + + for timeline_event in reversed(timeline): + if not timeline_event.is_state(): + continue + + event_key = (timeline_event.type, timeline_event.state_key) + + logger.debug("Considering %s for removal", event_key) + + state_event = result.get(event_key) + if (state_event is None or + state_event.event_id != timeline_event.event_id): + # the event in the timeline isn't present in the state + # dictionary. + # + # the most likely cause for this is that there was a fork in + # the event graph, and the state is no longer valid. Really, + # the event shouldn't be in the timeline. We're going to ignore + # it for now, however. + logger.warn("Found state event %r in timeline which doesn't " + "match state dictionary", timeline_event) + continue + + prev_event_id = timeline_event.unsigned.get("replaces_state", None) + logger.debug("Replacing %s with %s in state dict", + timeline_event.event_id, prev_event_id) + + if prev_event_id is None: + del result[event_key] + else: + result[event_key] = FrozenEvent({ + "type": timeline_event.type, + "state_key": timeline_event.state_key, + "content": timeline_event.unsigned['prev_content'], + "sender": timeline_event.unsigned['prev_sender'], + "event_id": prev_event_id, + "room_id": timeline_event.room_id, + }) + logger.debug("New value: %r", result.get(event_key)) + + return result + def register_servlets(hs, http_server): SyncRestServlet(hs).register(http_server) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 4a365ff639..5d35ca90b9 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -831,7 +831,8 @@ class EventsStore(SQLBaseStore): allow_none=True, ) if prev: - ev.unsigned["prev_content"] = prev.get_dict()["content"] + ev.unsigned["prev_content"] = prev.content + ev.unsigned["prev_sender"] = prev.sender self._get_event_cache.prefill( (ev.event_id, check_redacted, get_prev_content), ev @@ -888,7 +889,8 @@ class EventsStore(SQLBaseStore): get_prev_content=False, ) if prev: - ev.unsigned["prev_content"] = prev.get_dict()["content"] + ev.unsigned["prev_content"] = prev.content + ev.unsigned["prev_sender"] = prev.sender self._get_event_cache.prefill( (ev.event_id, check_redacted, get_prev_content), ev -- cgit 1.5.1