# -*- coding: utf-8 -*- # Copyright 2017 Vector Creations Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from twisted.internet import defer from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import get_domain_from_id, get_localpart_from_id import re import logging logger = logging.getLogger(__name__) class UserDirectoryStore(SQLBaseStore): @cachedInlineCallbacks(cache_context=True) def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context): """Check if the room is either world_readable or publically joinable """ current_state_ids = yield self.get_current_state_ids( room_id, on_invalidate=cache_context.invalidate ) join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) if join_rules_id: join_rule_ev = yield self.get_event(join_rules_id, allow_none=True) if join_rule_ev: if join_rule_ev.content.get("join_rule") == JoinRules.PUBLIC: defer.returnValue(True) hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, "")) if hist_vis_id: hist_vis_ev = yield self.get_event(hist_vis_id, allow_none=True) if hist_vis_ev: if hist_vis_ev.content.get("history_visibility") == "world_readable": defer.returnValue(True) defer.returnValue(False) @defer.inlineCallbacks def add_users_to_public_room(self, room_id, user_ids): """Add user to the list of users in public rooms Args: room_id (str): A room_id that all users are in that is world_readable or publically joinable user_ids (list(str)): Users to add """ yield self._simple_insert_many( table="users_in_public_rooms", values=[ { "user_id": user_id, "room_id": room_id, } for user_id in user_ids ], desc="add_users_to_public_room" ) for user_id in user_ids: self.get_user_in_public_room.invalidate((user_id,)) def add_profiles_to_user_dir(self, room_id, users_with_profile): """Add profiles to the user directory Args: room_id (str): A room_id that all users are joined to users_with_profile (dict): Users to add to directory in the form of mapping of user_id -> ProfileInfo """ if isinstance(self.database_engine, PostgresEngine): # We weight the loclpart most highly, then display name and finally # server name sql = """ INSERT INTO user_directory_search(user_id, vector) VALUES (?, setweight(to_tsvector('english', ?), 'A') || setweight(to_tsvector('english', ?), 'D') || setweight(to_tsvector('english', COALESCE(?, '')), 'B') ) """ args = ( ( user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id), profile.display_name, ) for user_id, profile in users_with_profile.iteritems() ) elif isinstance(self.database_engine, Sqlite3Engine): sql = """ INSERT INTO user_directory_search(user_id, value) VALUES (?,?) """ args = ( ( user_id, "%s %s" % (user_id, p.display_name,) if p.display_name else user_id ) for user_id, p in users_with_profile.iteritems() ) else: # This should be unreachable. raise Exception("Unrecognized database engine") def _add_profiles_to_user_dir_txn(txn): txn.executemany(sql, args) self._simple_insert_many_txn( txn, table="user_directory", values=[ { "user_id": user_id, "room_id": room_id, "display_name": profile.display_name, "avatar_url": profile.avatar_url, } for user_id, profile in users_with_profile.iteritems() ] ) for user_id in users_with_profile: txn.call_after( self.get_user_in_directory.invalidate, (user_id,) ) return self.runInteraction( "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn ) @defer.inlineCallbacks def update_user_in_user_dir(self, user_id, room_id): yield self._simple_update_one( table="user_directory", keyvalues={"user_id": user_id}, updatevalues={"room_id": room_id}, desc="update_user_in_user_dir", ) self.get_user_in_directory.invalidate((user_id,)) def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id): def _update_profile_in_user_dir_txn(txn): new_entry = self._simple_upsert_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, insertion_values={"room_id": room_id}, values={"display_name": display_name, "avatar_url": avatar_url}, lock=False, # We're only inserter ) if isinstance(self.database_engine, PostgresEngine): # We weight the localpart most highly, then display name and finally # server name if new_entry: sql = """ INSERT INTO user_directory_search(user_id, vector) VALUES (?, setweight(to_tsvector('english', ?), 'A') || setweight(to_tsvector('english', ?), 'D') || setweight(to_tsvector('english', COALESCE(?, '')), 'B') ) """ txn.execute( sql, ( user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id), display_name, ) ) else: sql = """ UPDATE user_directory_search SET vector = setweight(to_tsvector('english', ?), 'A') || setweight(to_tsvector('english', ?), 'D') || setweight(to_tsvector('english', COALESCE(?, '')), 'B') WHERE user_id = ? """ txn.execute( sql, ( get_localpart_from_id(user_id), get_domain_from_id(user_id), display_name, user_id, ) ) elif isinstance(self.database_engine, Sqlite3Engine): value = "%s %s" % (user_id, display_name,) if display_name else user_id self._simple_upsert_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id}, values={"value": value}, lock=False, # We're only inserter ) else: # This should be unreachable. raise Exception("Unrecognized database engine") txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) return self.runInteraction( "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) @defer.inlineCallbacks def update_user_in_public_user_list(self, user_id, room_id): yield self._simple_update_one( table="users_in_public_rooms", keyvalues={"user_id": user_id}, updatevalues={"room_id": room_id}, desc="update_user_in_public_user_list", ) self.get_user_in_public_room.invalidate((user_id,)) def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, ) self._simple_delete_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id}, ) self._simple_delete_txn( txn, table="users_in_public_rooms", keyvalues={"user_id": user_id}, ) txn.call_after( self.get_user_in_directory.invalidate, (user_id,) ) txn.call_after( self.get_user_in_public_room.invalidate, (user_id,) ) return self.runInteraction( "remove_from_user_dir", _remove_from_user_dir_txn, ) @defer.inlineCallbacks def remove_from_user_in_public_room(self, user_id): yield self._simple_delete( table="users_in_public_rooms", keyvalues={"user_id": user_id}, desc="remove_from_user_in_public_room", ) self.get_user_in_public_room.invalidate((user_id,)) def get_users_in_public_due_to_room(self, room_id): """Get all user_ids that are in the room directory becuase they're in the given room_id """ return self._simple_select_onecol( table="users_in_public_rooms", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_public_due_to_room", ) @defer.inlineCallbacks def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory becuase they're in the given room_id """ user_ids_dir = yield self._simple_select_onecol( table="user_directory", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", ) user_ids_pub = yield self._simple_select_onecol( table="users_in_public_rooms", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", ) user_ids_share = yield self._simple_select_onecol( table="users_who_share_rooms", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", ) user_ids = set(user_ids_dir) user_ids.update(user_ids_pub) user_ids.update(user_ids_share) defer.returnValue(user_ids) @defer.inlineCallbacks def get_all_rooms(self): """Get all room_ids we've ever known about, in ascending order of "size" """ sql = """ SELECT room_id FROM current_state_events GROUP BY room_id ORDER BY count(*) ASC """ rows = yield self._execute("get_all_rooms", None, sql) defer.returnValue([room_id for room_id, in rows]) @defer.inlineCallbacks def get_all_local_users(self): """Get all local users """ sql = """ SELECT name FROM users """ rows = yield self._execute("get_all_local_users", None, sql) defer.returnValue([name for name, in rows]) def add_users_who_share_room(self, room_id, share_private, user_id_tuples): """Insert entries into the users_who_share_rooms table. The first user should be a local user. Args: room_id (str) share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _add_users_who_share_room_txn(txn): self._simple_insert_many_txn( txn, table="users_who_share_rooms", values=[ { "user_id": user_id, "other_user_id": other_user_id, "room_id": room_id, "share_private": share_private, } for user_id, other_user_id in user_id_tuples ], ) for user_id, other_user_id in user_id_tuples: txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,), ) txn.call_after( self.get_if_users_share_a_room.invalidate, (user_id, other_user_id), ) return self.runInteraction( "add_users_who_share_room", _add_users_who_share_room_txn ) def update_users_who_share_room(self, room_id, share_private, user_id_sets): """Updates entries in the users_who_share_rooms table. The first user should be a local user. Args: room_id (str) share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _update_users_who_share_room_txn(txn): sql = """ UPDATE users_who_share_rooms SET room_id = ?, share_private = ? WHERE user_id = ? AND other_user_id = ? """ txn.executemany( sql, ( (room_id, share_private, uid, oid) for uid, oid in user_id_sets ) ) for user_id, other_user_id in user_id_sets: txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,), ) txn.call_after( self.get_if_users_share_a_room.invalidate, (user_id, other_user_id), ) return self.runInteraction( "update_users_who_share_room", _update_users_who_share_room_txn ) def remove_user_who_share_room(self, user_id, other_user_id): """Deletes entries in the users_who_share_rooms table. The first user should be a local user. Args: room_id (str) share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _remove_user_who_share_room_txn(txn): self._simple_delete_txn( txn, table="users_who_share_rooms", keyvalues={ "user_id": user_id, "other_user_id": other_user_id, }, ) txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,), ) txn.call_after( self.get_if_users_share_a_room.invalidate, (user_id, other_user_id), ) return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) @cached(max_entries=500000) def get_if_users_share_a_room(self, user_id, other_user_id): """Gets if users share a room. Args: user_id (str): Must be a local user_id other_user_id (str) Returns: bool|None: None if they don't share a room, otherwise whether they share a private room or not. """ return self._simple_select_one_onecol( table="users_who_share_rooms", keyvalues={ "user_id": user_id, "other_user_id": other_user_id, }, retcol="share_private", allow_none=True, desc="get_if_users_share_a_room", ) @cachedInlineCallbacks(max_entries=500000, iterable=True) def get_users_who_share_room_from_dir(self, user_id): """Returns the set of users who share a room with `user_id` Args: user_id(str): Must be a local user Returns: dict: user_id -> share_private mapping """ rows = yield self._simple_select_list( table="users_who_share_rooms", keyvalues={ "user_id": user_id, }, retcols=("other_user_id", "share_private",), desc="get_users_who_share_room_with_user", ) defer.returnValue({ row["other_user_id"]: row["share_private"] for row in rows }) def get_users_in_share_dir_with_room_id(self, user_id, room_id): """Get all user tuples that are in the users_who_share_rooms due to the given room_id. Returns: [(user_id, other_user_id)]: where one of the two will match the given user_id. """ sql = """ SELECT user_id, other_user_id FROM users_who_share_rooms WHERE room_id = ? AND (user_id = ? OR other_user_id = ?) """ return self._execute( "get_users_in_share_dir_with_room_id", None, sql, room_id, user_id, user_id ) @defer.inlineCallbacks def get_rooms_in_common_for_users(self, user_id, other_user_id): """Given two user_ids find out the list of rooms they share. """ sql = """ SELECT room_id FROM ( SELECT c.room_id FROM current_state_events AS c INNER JOIN room_memberships USING (event_id) WHERE type = 'm.room.member' AND membership = 'join' AND state_key = ? ) AS f1 INNER JOIN ( SELECT c.room_id FROM current_state_events AS c INNER JOIN room_memberships USING (event_id) WHERE type = 'm.room.member' AND membership = 'join' AND state_key = ? ) f2 USING (room_id) """ rows = yield self._execute( "get_rooms_in_common_for_users", None, sql, user_id, other_user_id ) defer.returnValue([room_id for room_id, in rows]) def delete_all_from_user_dir(self): """Delete the entire user directory """ def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") txn.execute("DELETE FROM users_who_share_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) txn.call_after(self.get_user_in_public_room.invalidate_all) txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) txn.call_after(self.get_if_users_share_a_room.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn ) @cached() def get_user_in_directory(self, user_id): return self._simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, retcols=("room_id", "display_name", "avatar_url",), allow_none=True, desc="get_user_in_directory", ) @cached() def get_user_in_public_room(self, user_id): return self._simple_select_one( table="users_in_public_rooms", keyvalues={"user_id": user_id}, retcols=("room_id",), allow_none=True, desc="get_user_in_public_room", ) def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", keyvalues={}, retcol="stream_id", desc="get_user_directory_stream_pos", ) def update_user_directory_stream_pos(self, stream_id): return self._simple_update_one( table="user_directory_stream_pos", keyvalues={}, updatevalues={"stream_id": stream_id}, desc="update_user_directory_stream_pos", ) def get_current_state_deltas(self, prev_stream_id): prev_stream_id = int(prev_stream_id) if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id): return [] def get_current_state_deltas_txn(txn): # First we calculate the max stream id that will give us less than # N results. # We arbitarily limit to 100 stream_id entries to ensure we don't # select toooo many. sql = """ SELECT stream_id, count(*) FROM current_state_delta_stream WHERE stream_id > ? GROUP BY stream_id ORDER BY stream_id ASC LIMIT 100 """ txn.execute(sql, (prev_stream_id,)) total = 0 max_stream_id = prev_stream_id for max_stream_id, count in txn: total += count if total > 100: # We arbitarily limit to 100 entries to ensure we don't # select toooo many. break # Now actually get the deltas sql = """ SELECT stream_id, room_id, type, state_key, event_id, prev_event_id FROM current_state_delta_stream WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC """ txn.execute(sql, (prev_stream_id, max_stream_id,)) return self.cursor_to_dict(txn) return self.runInteraction( "get_current_state_deltas", get_current_state_deltas_txn ) def get_max_stream_id_in_current_state_deltas(self): return self._simple_select_one_onecol( table="current_state_delta_stream", keyvalues={}, retcol="COALESCE(MAX(stream_id), -1)", desc="get_max_stream_id_in_current_state_deltas", ) @defer.inlineCallbacks def search_user_dir(self, user_id, search_term, limit): """Searches for users in directory Returns: dict of the form:: { "limited": <bool>, # whether there were more results or not "results": [ # Ordered by best match first { "user_id": <user_id>, "display_name": <display_name>, "avatar_url": <avatar_url> } ] } """ if self.hs.config.user_directory_search_all_users: # make s.user_id null to keep the ordering algorithm happy join_clause = """ CROSS JOIN (SELECT NULL as user_id) AS s """ join_args = () where_clause = "1=1" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) LEFT JOIN ( SELECT other_user_id AS user_id FROM users_who_share_rooms WHERE user_id = ? AND share_private ) AS s USING (user_id) """ join_args = (user_id,) where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) # We order by rank and then if they have profile info # The ranking algorithm is hand tweaked for "best" results. Broadly # the idea is we give a higher weight to exact matches. # The array of numbers are the weights for the various part of the # search: (domain, _, display name, localpart) sql = """ SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory AS d USING (user_id) %s WHERE %s AND vector @@ to_tsquery('english', ?) ORDER BY (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) * ( 3 * ts_rank_cd( '{0.1, 0.1, 0.9, 1.0}', vector, to_tsquery('english', ?), 8 ) + ts_rank_cd( '{0.1, 0.1, 0.9, 1.0}', vector, to_tsquery('english', ?), 8 ) ) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) args = join_args + (full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) sql = """ SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory AS d USING (user_id) %s WHERE %s AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) args = join_args + (search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") results = yield self._execute( "search_user_dir", self.cursor_to_dict, sql, *args ) limited = len(results) > limit defer.returnValue({ "limited": limited, "results": results, }) def _parse_query_sqlite(search_term): """Takes a plain unicode string from the user and converts it into a form that can be passed to database. We use this so that we can add prefix matching, which isn't something that is supported by default. We specifically add both a prefix and non prefix matching term so that exact matches get ranked higher. """ # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) return " & ".join("(%s* OR %s)" % (result, result,) for result in results) def _parse_query_postgres(search_term): """Takes a plain unicode string from the user and converts it into a form that can be passed to database. We use this so that we can add prefix matching, which isn't something that is supported by default. """ # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) both = " & ".join("(%s:* | %s)" % (result, result,) for result in results) exact = " & ".join("%s" % (result,) for result in results) prefix = " & ".join("%s:*" % (result,) for result in results) return both, exact, prefix