diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index e747c6b50e..9ddc19900a 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -12,11 +12,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from typing import List, Tuple, Union
+from unittest.case import SkipTest
+from unittest.mock import PropertyMock, patch
+
+from twisted.test.proto_helpers import MemoryReactor
+
import synapse.rest.admin
from synapse.api.constants import EventTypes
from synapse.api.errors import StoreError
from synapse.rest.client import login, room
+from synapse.server import HomeServer
+from synapse.storage.databases.main import DataStore
+from synapse.storage.databases.main.search import Phrase, SearchToken, _tokenize_query
from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines.sqlite import Sqlite3Engine
+from synapse.util import Clock
from tests.unittest import HomeserverTestCase, skip_unless
from tests.utils import USE_POSTGRES_FOR_TESTS
@@ -187,3 +198,205 @@ class EventSearchInsertionTest(HomeserverTestCase):
),
)
self.assertCountEqual(values, ["hi", "2"])
+
+
+class MessageSearchTest(HomeserverTestCase):
+ """
+ Check message search.
+
+ A powerful way to check the behaviour is to run the following in Postgres >= 11:
+
+ # SELECT websearch_to_tsquery('english', <your string>);
+
+ The result can be compared to the tokenized version for SQLite and Postgres < 11.
+
+ """
+
+ servlets = [
+ synapse.rest.admin.register_servlets_for_client_rest_resource,
+ login.register_servlets,
+ room.register_servlets,
+ ]
+
+ PHRASE = "the quick brown fox jumps over the lazy dog"
+
+ # Each entry is a search query, followed by either a boolean of whether it is
+ # in the phrase OR a tuple of booleans: whether it matches using websearch
+ # and using plain search.
+ COMMON_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+ ("nope", False),
+ ("brown", True),
+ ("quick brown", True),
+ ("brown quick", True),
+ ("quick \t brown", True),
+ ("jump", True),
+ ("brown nope", False),
+ ('"brown quick"', (False, True)),
+ ('"jumps over"', True),
+ ('"quick fox"', (False, True)),
+ ("nope OR doublenope", False),
+ ("furphy OR fox", (True, False)),
+ ("fox -nope", (True, False)),
+ ("fox -brown", (False, True)),
+ ('"fox" quick', True),
+ ('"fox quick', True),
+ ('"quick brown', True),
+ ('" quick "', True),
+ ('" nope"', False),
+ ]
+ # TODO Test non-ASCII cases.
+
+ # Case that fail on SQLite.
+ POSTGRES_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+ # SQLite treats NOT as a binary operator.
+ ("- fox", (False, True)),
+ ("- nope", (True, False)),
+ ('"-fox quick', (False, True)),
+ # PostgreSQL skips stop words.
+ ('"the quick brown"', True),
+ ('"over lazy"', True),
+ ]
+
+ def prepare(
+ self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+ ) -> None:
+ # Register a user and create a room, create some messages
+ self.register_user("alice", "password")
+ self.access_token = self.login("alice", "password")
+ self.room_id = self.helper.create_room_as("alice", tok=self.access_token)
+
+ # Send the phrase as a message and check it was created
+ response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
+ self.assertIn("event_id", response)
+
+ def test_tokenize_query(self) -> None:
+ """Test the custom logic to tokenize a user's query."""
+ cases = (
+ ("brown", ["brown"]),
+ ("quick brown", ["quick", SearchToken.And, "brown"]),
+ ("quick \t brown", ["quick", SearchToken.And, "brown"]),
+ ('"brown quick"', [Phrase(["brown", "quick"])]),
+ ("furphy OR fox", ["furphy", SearchToken.Or, "fox"]),
+ ("fox -brown", ["fox", SearchToken.Not, "brown"]),
+ ("- fox", [SearchToken.Not, "fox"]),
+ ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
+ # No trailing double quoe.
+ ('"fox quick', ["fox", SearchToken.And, "quick"]),
+ ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
+ ('" quick "', [Phrase(["quick"])]),
+ (
+ 'q"uick brow"n',
+ [
+ "q",
+ SearchToken.And,
+ Phrase(["uick", "brow"]),
+ SearchToken.And,
+ "n",
+ ],
+ ),
+ (
+ '-"quick brown"',
+ [SearchToken.Not, Phrase(["quick", "brown"])],
+ ),
+ )
+
+ for query, expected in cases:
+ tokenized = _tokenize_query(query)
+ self.assertEqual(
+ tokenized, expected, f"{tokenized} != {expected} for {query}"
+ )
+
+ def _check_test_cases(
+ self,
+ store: DataStore,
+ cases: List[Tuple[str, Union[bool, Tuple[bool, bool]]]],
+ index=0,
+ ) -> None:
+ # Run all the test cases versus search_msgs
+ for query, expect_to_contain in cases:
+ if isinstance(expect_to_contain, tuple):
+ expect_to_contain = expect_to_contain[index]
+
+ result = self.get_success(
+ store.search_msgs([self.room_id], query, ["content.body"])
+ )
+ self.assertEquals(
+ result["count"],
+ 1 if expect_to_contain else 0,
+ f"expected '{query}' to match '{self.PHRASE}'"
+ if expect_to_contain
+ else f"'{query}' unexpectedly matched '{self.PHRASE}'",
+ )
+ self.assertEquals(
+ len(result["results"]),
+ 1 if expect_to_contain else 0,
+ "results array length should match count",
+ )
+
+ # Run them again versus search_rooms
+ for query, expect_to_contain in cases:
+ if isinstance(expect_to_contain, tuple):
+ expect_to_contain = expect_to_contain[index]
+
+ result = self.get_success(
+ store.search_rooms([self.room_id], query, ["content.body"], 10)
+ )
+ self.assertEquals(
+ result["count"],
+ 1 if expect_to_contain else 0,
+ f"expected '{query}' to match '{self.PHRASE}'"
+ if expect_to_contain
+ else f"'{query}' unexpectedly matched '{self.PHRASE}'",
+ )
+ self.assertEquals(
+ len(result["results"]),
+ 1 if expect_to_contain else 0,
+ "results array length should match count",
+ )
+
+ def test_postgres_web_search_for_phrase(self):
+ """
+ Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery.
+ This test is skipped unless the postgres instance supports websearch_to_tsquery.
+ """
+
+ store = self.hs.get_datastores().main
+ if not isinstance(store.database_engine, PostgresEngine):
+ raise SkipTest("Test only applies when postgres is used as the database")
+
+ if store.database_engine.tsquery_func != "websearch_to_tsquery":
+ raise SkipTest(
+ "Test only applies when postgres supporting websearch_to_tsquery is used as the database"
+ )
+
+ self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES, index=0)
+
+ def test_postgres_non_web_search_for_phrase(self):
+ """
+ Test postgres searching for phrases without using web search, which is used when websearch_to_tsquery isn't
+ supported by the current postgres version.
+ """
+
+ store = self.hs.get_datastores().main
+ if not isinstance(store.database_engine, PostgresEngine):
+ raise SkipTest("Test only applies when postgres is used as the database")
+
+ # Patch supports_websearch_to_tsquery to always return False to ensure we're testing the plainto_tsquery path.
+ with patch(
+ "synapse.storage.engines.postgres.PostgresEngine.tsquery_func",
+ new_callable=PropertyMock,
+ ) as supports_websearch_to_tsquery:
+ supports_websearch_to_tsquery.return_value = "plainto_tsquery"
+ self._check_test_cases(
+ store, self.COMMON_CASES + self.POSTGRES_CASES, index=1
+ )
+
+ def test_sqlite_search(self):
+ """
+ Test sqlite searching for phrases.
+ """
+ store = self.hs.get_datastores().main
+ if not isinstance(store.database_engine, Sqlite3Engine):
+ raise SkipTest("Test only applies when sqlite is used as the database")
+
+ self._check_test_cases(store, self.COMMON_CASES, index=0)
|