summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2017-05-31 11:51:01 +0100
committerErik Johnston <erik@matrix.org>2017-05-31 11:51:01 +0100
commiteeb2f9e546060ca9f2ef7260220b51d85d9b0d92 (patch)
treec0b211afe6a1509e02ff2667a44cd92542fb50ef
parentMerge pull request #2251 from matrix-org/erikj/current_state_delta_stream (diff)
downloadsynapse-eeb2f9e546060ca9f2ef7260220b51d85d9b0d92.tar.xz
Add user_directory to database
-rw-r--r--synapse/handlers/user_directory.py218
-rw-r--r--synapse/notifier.py6
-rw-r--r--synapse/server.py5
-rw-r--r--synapse/storage/__init__.py2
-rw-r--r--synapse/storage/schema/delta/42/user_dir.py69
-rw-r--r--synapse/storage/user_directory.py145
6 files changed, 444 insertions, 1 deletions
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
new file mode 100644
index 0000000000..43e917c1a0
--- /dev/null
+++ b/synapse/handlers/user_directory.py
@@ -0,0 +1,218 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, JoinRules, Membership
+from synapse.storage.roommember import ProfileInfo
+from synapse.util.metrics import Measure
+
+
+logger = logging.getLogger(__name__)
+
+
+class UserDirectoyHandler(object):
+    def __init__(self, hs):
+        self.store = hs.get_datastore()
+        self.state = hs.get_state_handler()
+        self.server_name = hs.hostname
+        self.clock = hs.get_clock()
+
+        self.initially_handled_users = set()
+
+        self.pos = None
+
+        self._is_processing = False
+
+    @defer.inlineCallbacks
+    def notify_new_event(self):
+        if self._is_processing:
+            return
+
+        self._is_processing = True
+        try:
+            yield self._unsafe_process()
+        finally:
+            self._is_processing = False
+
+    @defer.inlineCallbacks
+    def _unsafe_process(self):
+        if self.pos is None:
+            self.pos = yield self.store.get_user_directory_stream_pos()
+
+        if self.pos is None:
+            yield self._do_initial_spam()
+            self.pos = yield self.store.get_user_directory_stream_pos()
+
+        while True:
+            with Measure(self.clock, "user_dir_delta"):
+                deltas = yield self.store.get_current_state_deltas(self.pos)
+                if not deltas:
+                    return
+
+                yield self._handle_deltas(deltas)
+
+                max_stream_id = deltas[-1]["stream_id"]
+                yield self.store.update_user_directory_stream_pos(max_stream_id)
+
+    @defer.inlineCallbacks
+    def _handle_room(self, room_id):
+        # TODO: Check we're still joined to room
+
+        is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id)
+        if not is_public:
+            return
+
+        users_with_profile = yield self.state.get_current_user_in_room(room_id)
+        unhandled_users = set(users_with_profile) - self.initially_handled_users
+
+        yield self.store.add_profiles_to_user_dir(
+            room_id, {
+                user_id: users_with_profile[user_id] for user_id in unhandled_users
+            }
+        )
+
+        self.initially_handled_users |= unhandled_users
+
+    @defer.inlineCallbacks
+    def _do_initial_spam(self):
+        yield self.store.delete_all_from_user_dir()
+
+        room_ids = yield self.store.get_all_rooms()
+
+        for room_id in room_ids:
+            yield self._handle_room(room_id)
+
+        self.initially_handled_users = None
+
+        yield self.store.update_user_directory_stream_pos(-1)
+
+    @defer.inlineCallbacks
+    def _handle_new_user(self, room_id, user_id, profile):
+        row = yield self.store.get_user_in_directory(user_id)
+        if row:
+            return
+
+        yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile})
+
+    def _handle_remove_user(self, room_id, user_id):
+        row = yield self.store.get_user_in_directory(user_id)
+        if not row or row["room_id"] != room_id:
+            return
+
+        # TODO: Make this faster?
+        rooms = yield self.store.get_rooms_for_user(user_id)
+        for room_id in rooms:
+            is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
+                room_id
+            )
+
+            if is_public:
+                return
+
+        yield self.store.remove_from_user_dir(user_id)
+
+    @defer.inlineCallbacks
+    def _handle_deltas(self, deltas):
+        for delta in deltas:
+            typ = delta["type"]
+            state_key = delta["state_key"]
+            room_id = delta["room_id"]
+            event_id = delta["event_id"]
+            prev_event_id = delta["prev_event_id"]
+
+            if typ == EventTypes.RoomHistoryVisibility:
+                change = yield self._get_key_change(
+                    prev_event_id, event_id,
+                    key_name="history_visibility",
+                    public_value="world_readable",
+                )
+                if change is None:
+                    continue
+
+                users_with_profile = yield self.state.get_current_user_in_room(room_id)
+                for user_id, profile in users_with_profile.iteritems():
+                    if change:
+                        yield self._handle_new_user(room_id, user_id, profile)
+                    else:
+                        yield self._handle_remove_user(room_id, user_id)
+            elif typ == EventTypes.JoinRules:
+                change = yield self._get_key_change(
+                    prev_event_id, event_id,
+                    key_name="join_rules",
+                    public_value=JoinRules.PUBLIC,
+                )
+                if change is None:
+                    continue
+
+                users_with_profile = yield self.state.get_current_user_in_room(room_id)
+                for user_id, profile in users_with_profile.iteritems():
+                    if change:
+                        yield self._handle_new_user(room_id, user_id, profile)
+                    else:
+                        yield self._handle_remove_user(room_id, user_id)
+            elif typ == EventTypes.Member:
+                change = yield self._get_key_change(
+                    prev_event_id, event_id,
+                    key_name="membership",
+                    public_value=Membership.JOIN,
+                )
+
+                if change is None:
+                    continue
+
+                if change:
+                    event = yield self.store.get_event(event_id)
+                    profile = ProfileInfo(
+                        avatar_url=event.content.get("avatar_url"),
+                        display_name=event.content.get("displayname"),
+                    )
+
+                    yield self._handle_new_user(room_id, state_key, profile)
+                else:
+                    yield self._handle_remove_user(room_id, state_key)
+
+    @defer.inlineCallbacks
+    def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
+        prev_event = None
+        event = None
+        if prev_event_id:
+            prev_event = yield self.store.get_event(prev_event_id, allow_none=True)
+
+        if event_id:
+            event = yield self.store.get_event(event_id, allow_none=True)
+
+        if not event and not prev_event:
+            defer.returnValue(None)
+
+        prev_hist_vis = None
+        hist_vis = None
+
+        if prev_event:
+            prev_hist_vis = prev_event.content.get(key_name, None)
+
+        if event:
+            hist_vis = event.content.get(key_name, None)
+
+        logger.info("prev: %r, new: %r", prev_hist_vis, hist_vis)
+
+        if hist_vis == public_value and prev_hist_vis != public_value:
+            defer.returnValue(True)
+        elif hist_vis != public_value and prev_hist_vis == public_value:
+            defer.returnValue(False)
+        else:
+            defer.returnValue(None)
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 48566187ab..6b1709d700 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -167,6 +167,7 @@ class Notifier(object):
 
         self.clock = hs.get_clock()
         self.appservice_handler = hs.get_application_service_handler()
+        self.user_directory_handler = hs.get_user_directory_handler()
 
         if hs.should_send_federation():
             self.federation_sender = hs.get_federation_sender()
@@ -251,7 +252,10 @@ class Notifier(object):
         """Notify any user streams that are interested in this room event"""
         # poke any interested application service.
         preserve_fn(self.appservice_handler.notify_interested_services)(
-            room_stream_id)
+            room_stream_id
+        )
+
+        preserve_fn(self.user_directory_handler.notify_new_event)()
 
         if self.federation_sender:
             preserve_fn(self.federation_sender.notify_new_events)(
diff --git a/synapse/server.py b/synapse/server.py
index e400e278c6..a38e5179e0 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -49,6 +49,7 @@ from synapse.handlers.events import EventHandler, EventStreamHandler
 from synapse.handlers.initial_sync import InitialSyncHandler
 from synapse.handlers.receipts import ReceiptsHandler
 from synapse.handlers.read_marker import ReadMarkerHandler
+from synapse.handlers.user_directory import UserDirectoyHandler
 from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.notifier import Notifier
@@ -137,6 +138,7 @@ class HomeServer(object):
         'tcp_replication',
         'read_marker_handler',
         'action_generator',
+        'user_directory_handler',
     ]
 
     def __init__(self, hostname, **kwargs):
@@ -304,6 +306,9 @@ class HomeServer(object):
     def build_action_generator(self):
         return ActionGenerator(self)
 
+    def build_user_directory_handler(self):
+        return UserDirectoyHandler(self)
+
     def remove_pusher(self, app_id, push_key, user_id):
         return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)
 
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index d604e7668f..11655bf60f 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -49,6 +49,7 @@ from .tags import TagsStore
 from .account_data import AccountDataStore
 from .openid import OpenIdStore
 from .client_ips import ClientIpStore
+from .user_directory import UserDirectoryStore
 
 from .util.id_generators import IdGenerator, StreamIdGenerator, ChainedIdGenerator
 from .engines import PostgresEngine
@@ -86,6 +87,7 @@ class DataStore(RoomMemberStore, RoomStore,
                 ClientIpStore,
                 DeviceStore,
                 DeviceInboxStore,
+                UserDirectoryStore,
                 ):
 
     def __init__(self, db_conn, hs):
diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/schema/delta/42/user_dir.py
new file mode 100644
index 0000000000..38538960a4
--- /dev/null
+++ b/synapse/storage/schema/delta/42/user_dir.py
@@ -0,0 +1,69 @@
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.prepare_database import get_statements
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+
+logger = logging.getLogger(__name__)
+
+
+BOTH_TABLES = """
+CREATE TABLE user_directory_stream_pos (
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_id BIGINT,
+    CHECK (Lock='X')
+);
+
+INSERT INTO user_directory_stream_pos (stream_id) VALUES (null);
+"""
+
+
+POSTGRES_TABLE = """
+CREATE TABLE user_directory (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    display_name TEXT,
+    avatar_url TEXT,
+    vector tsvector
+);
+
+CREATE INDEX user_directory_fts_idx ON user_directory USING gin(vector);
+CREATE INDEX user_directory_user_idx ON user_directory(user_id);
+"""
+
+
+SQLITE_TABLE = """
+CREATE VIRTUAL TABLE user_directory
+    USING fts4 ( user_id, room_id, display_name, avatar_url, value );
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    for statement in get_statements(BOTH_TABLES.splitlines()):
+        cur.execute(statement)
+
+    if isinstance(database_engine, PostgresEngine):
+        for statement in get_statements(POSTGRES_TABLE.splitlines()):
+            cur.execute(statement)
+    elif isinstance(database_engine, Sqlite3Engine):
+        for statement in get_statements(SQLITE_TABLE.splitlines()):
+            cur.execute(statement)
+    else:
+        raise Exception("Unrecognized database engine")
+
+
+def run_upgrade(*args, **kwargs):
+    pass
diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py
new file mode 100644
index 0000000000..6c7c8c4bee
--- /dev/null
+++ b/synapse/storage/user_directory.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.internet import defer
+
+from ._base import SQLBaseStore
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+from synapse.api.constants import EventTypes, JoinRules
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+
+
+class UserDirectoryStore(SQLBaseStore):
+
+    @cachedInlineCallbacks(cache_context=True)
+    def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context):
+        current_state_ids = yield self.get_current_state_ids(
+            room_id, on_invalidate=cache_context.invalidate
+        )
+
+        join_rules_id = current_state_ids.get((EventTypes.JoinRules, ""))
+        if join_rules_id:
+            join_rule_ev = yield self.get_event(join_rules_id, allow_none=True)
+            if join_rule_ev:
+                if join_rule_ev.content.get("join_rules") == JoinRules.PUBLIC:
+                    defer.returnValue(True)
+
+        hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, ""))
+        if hist_vis_id:
+            hist_vis_ev = yield self.get_event(hist_vis_id, allow_none=True)
+            if hist_vis_ev:
+                if hist_vis_ev.content.get("history_visibility") == "world_readable":
+                    defer.returnValue(True)
+
+        defer.returnValue(False)
+
+    def add_profiles_to_user_dir(self, room_id, users_with_profile):
+        if isinstance(self.database_engine, PostgresEngine):
+            sql = """
+                INSERT INTO user_directory
+                    (user_id, room_id, display_name, avatar_url, vector)
+                VALUES (?,?,?,?,to_tsvector('english', ?))
+            """
+        elif isinstance(self.database_engine, Sqlite3Engine):
+            sql = """
+                INSERT INTO user_directory
+                    (user_id, room_id, display_name, avatar_url, value)
+                VALUES (?,?,?,?,?)
+            """
+        else:
+            # This should be unreachable.
+            raise Exception("Unrecognized database engine")
+
+        def _add_profiles_to_user_dir_txn(txn):
+            txn.executemany(sql, (
+                (
+                    user_id, room_id, p.display_name, p.avatar_url,
+                    "%s %s" % (user_id, p.display_name,) if p.display_name else user_id
+                )
+                for user_id, p in users_with_profile.iteritems()
+            ))
+            for user_id in users_with_profile:
+                txn.call_after(
+                    self.get_user_in_directory.invalidate, (user_id,)
+                )
+
+        return self.runInteraction(
+            "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn
+        )
+
+    @defer.inlineCallbacks
+    def remove_from_user_dir(self, user_id):
+        yield self._simple_delete(
+            table="user_directory",
+            keyvalues={"user_id": user_id},
+            desc="remove_from_user_dir",
+        )
+        self.get_user_in_directory.invalidate((user_id,))
+
+    def get_all_rooms(self):
+        return self._simple_select_onecol(
+            table="current_state_events",
+            keyvalues={},
+            retcol="DISTINCT room_id",
+            desc="get_all_rooms",
+        )
+
+    def delete_all_from_user_dir(self):
+        def _delete_all_from_user_dir_txn(txn):
+            txn.execute("DELETE FROM user_directory")
+            txn.call_after(self.get_user_in_directory.invalidate_all)
+        return self.runInteraction(
+            "delete_all_from_user_dir", _delete_all_from_user_dir_txn
+        )
+
+    @cached()
+    def get_user_in_directory(self, user_id):
+        return self._simple_select_one(
+            table="user_directory",
+            keyvalues={"user_id": user_id},
+            retcols=("room_id", "display_name", "avatar_url",),
+            allow_none=True,
+            desc="get_user_in_directory",
+        )
+
+    def get_user_directory_stream_pos(self):
+        return self._simple_select_one_onecol(
+            table="user_directory_stream_pos",
+            keyvalues={},
+            retcol="stream_id",
+            desc="get_user_directory_stream_pos",
+        )
+
+    def update_user_directory_stream_pos(self, stream_id):
+        return self._simple_update_one(
+            table="user_directory_stream_pos",
+            keyvalues={},
+            updatevalues={"stream_id": stream_id},
+            desc="update_user_directory_stream_pos",
+        )
+
+    def get_current_state_deltas(self, prev_stream_id):
+        # TODO: Add stream change cache
+        # TODO: Add limit
+        sql = """
+            SELECT stream_id, room_id, type, state_key, event_id, prev_event_id
+            FROM current_state_delta_stream
+            WHERE stream_id > ?
+            ORDER BY stream_id ASC
+        """
+
+        return self._execute(
+            "get_current_state_deltas", self.cursor_to_dict, sql, prev_stream_id
+        )