summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erikj@jki.re>2017-06-13 10:58:43 +0100
committerGitHub <noreply@github.com>2017-06-13 10:58:43 +0100
commit90ae0cffecff027f17573a2576422ce127998d32 (patch)
treea337ba49c2abc09f72635bbf6c30f1f0bc363d2b
parentMerge pull request #2276 from matrix-org/erikj/fix_user_di (diff)
parentTweak the ranking of PG user dir search (diff)
downloadsynapse-90ae0cffecff027f17573a2576422ce127998d32.tar.xz
Merge pull request #2275 from matrix-org/erikj/tweark_user_directory_search
Tweak the ranking of PG user dir search
-rw-r--r--synapse/storage/user_directory.py53
1 files changed, 40 insertions, 13 deletions
diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py
index 6a4bf63f0d..0b874e0227 100644
--- a/synapse/storage/user_directory.py
+++ b/synapse/storage/user_directory.py
@@ -391,11 +391,14 @@ class UserDirectoryStore(SQLBaseStore):
                     ]
                 }
         """
-
-        search_query = _parse_query(self.database_engine, search_term)
-
         if isinstance(self.database_engine, PostgresEngine):
+            full_query, exact_query, prefix_query = _parse_query_postgres(search_term)
+
             # We order by rank and then if they have profile info
+            # The ranking algorithm is hand tweaked for "best" results. Broadly
+            # the idea is we give a higher weight to exact matches.
+            # The array of numbers are the weights for the various part of the
+            # search: (domain, _, display name, localpart)
             sql = """
                 SELECT user_id, display_name, avatar_url
                 FROM user_directory_search
@@ -403,13 +406,27 @@ class UserDirectoryStore(SQLBaseStore):
                 INNER JOIN users_in_pubic_room USING (user_id)
                 WHERE vector @@ to_tsquery('english', ?)
                 ORDER BY
-                    ts_rank_cd(vector, to_tsquery('english', ?), 1) DESC,
+                    2 * ts_rank_cd(
+                        '{0.1, 0.1, 0.9, 1.0}',
+                        vector,
+                        to_tsquery('english', ?),
+                        8
+                    )
+                    + ts_rank_cd(
+                        '{0.1, 0.1, 0.9, 1.0}',
+                        vector,
+                        to_tsquery('english', ?),
+                        8
+                    )
+                    DESC,
                     display_name IS NULL,
                     avatar_url IS NULL
                 LIMIT ?
             """
-            args = (search_query, search_query, limit + 1,)
+            args = (full_query, exact_query, prefix_query, limit + 1,)
         elif isinstance(self.database_engine, Sqlite3Engine):
+            search_query = _parse_query_sqlite(search_term)
+
             sql = """
                 SELECT user_id, display_name, avatar_url
                 FROM user_directory_search
@@ -439,7 +456,7 @@ class UserDirectoryStore(SQLBaseStore):
         })
 
 
-def _parse_query(database_engine, search_term):
+def _parse_query_sqlite(search_term):
     """Takes a plain unicode string from the user and converts it into a form
     that can be passed to database.
     We use this so that we can add prefix matching, which isn't something
@@ -451,11 +468,21 @@ def _parse_query(database_engine, search_term):
 
     # Pull out the individual words, discarding any non-word characters.
     results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+    return " & ".join("(%s* | %s)" % (result, result,) for result in results)
+
+
+def _parse_query_postgres(search_term):
+    """Takes a plain unicode string from the user and converts it into a form
+    that can be passed to database.
+    We use this so that we can add prefix matching, which isn't something
+    that is supported by default.
+    """
+
+    # Pull out the individual words, discarding any non-word characters.
+    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+
+    both = " & ".join("(%s:* | %s)" % (result, result,) for result in results)
+    exact = " & ".join("%s" % (result,) for result in results)
+    prefix = " & ".join("%s:*" % (result,) for result in results)
 
-    if isinstance(database_engine, PostgresEngine):
-        return " & ".join("(%s:* | %s)" % (result, result,) for result in results)
-    elif isinstance(database_engine, Sqlite3Engine):
-        return " & ".join("(%s* | %s)" % (result, result,) for result in results)
-    else:
-        # This should be unreachable.
-        raise Exception("Unrecognized database engine")
+    return both, exact, prefix