diff options
Diffstat (limited to '')
-rw-r--r-- | tests/storage/test_room_search.py | 192 |
1 files changed, 189 insertions, 3 deletions
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index e747c6b50e..14d872514d 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -12,11 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List, Tuple +from unittest.case import SkipTest + +from twisted.test.proto_helpers import MemoryReactor + import synapse.rest.admin from synapse.api.constants import EventTypes from synapse.api.errors import StoreError from synapse.rest.client import login, room +from synapse.server import HomeServer +from synapse.storage.databases.main import DataStore +from synapse.storage.databases.main.search import Phrase, SearchToken, _tokenize_query from synapse.storage.engines import PostgresEngine +from synapse.storage.engines.sqlite import Sqlite3Engine +from synapse.util import Clock from tests.unittest import HomeserverTestCase, skip_unless from tests.utils import USE_POSTGRES_FOR_TESTS @@ -29,7 +39,7 @@ class EventSearchInsertionTest(HomeserverTestCase): room.register_servlets, ] - def test_null_byte(self): + def test_null_byte(self) -> None: """ Postgres/SQLite don't like null bytes going into the search tables. Internally we replace those with a space. @@ -76,7 +86,7 @@ class EventSearchInsertionTest(HomeserverTestCase): if isinstance(store.database_engine, PostgresEngine): self.assertIn("alice", result.get("highlights")) - def test_non_string(self): + def test_non_string(self) -> None: """Test that non-string `value`s are not inserted into `event_search`. This is particularly important when using sqlite, since a sqlite column can hold @@ -147,7 +157,7 @@ class EventSearchInsertionTest(HomeserverTestCase): self.assertEqual(f.value.code, 404) @skip_unless(not USE_POSTGRES_FOR_TESTS, "requires sqlite") - def test_sqlite_non_string_deletion_background_update(self): + def test_sqlite_non_string_deletion_background_update(self) -> None: """Test the background update to delete bad rows from `event_search`.""" store = self.hs.get_datastores().main @@ -187,3 +197,179 @@ class EventSearchInsertionTest(HomeserverTestCase): ), ) self.assertCountEqual(values, ["hi", "2"]) + + +class MessageSearchTest(HomeserverTestCase): + """ + Check message search. + + A powerful way to check the behaviour is to run the following in Postgres >= 11: + + # SELECT websearch_to_tsquery('english', <your string>); + + The result can be compared to the tokenized version for SQLite and Postgres < 11. + + """ + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + + PHRASE = "the quick brown fox jumps over the lazy dog" + + # Each entry is a search query, followed by a boolean of whether it is in the phrase. + COMMON_CASES = [ + ("nope", False), + ("brown", True), + ("quick brown", True), + ("brown quick", True), + ("quick \t brown", True), + ("jump", True), + ("brown nope", False), + ('"brown quick"', False), + ('"jumps over"', True), + ('"quick fox"', False), + ("nope OR doublenope", False), + ("furphy OR fox", True), + ("fox -nope", True), + ("fox -brown", False), + ('"fox" quick', True), + ('"quick brown', True), + ('" quick "', True), + ('" nope"', False), + ] + # TODO Test non-ASCII cases. + + # Case that fail on SQLite. + POSTGRES_CASES = [ + # SQLite treats NOT as a binary operator. + ("- fox", False), + ("- nope", True), + ('"-fox quick', False), + # PostgreSQL skips stop words. + ('"the quick brown"', True), + ('"over lazy"', True), + ] + + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + # Register a user and create a room, create some messages + self.register_user("alice", "password") + self.access_token = self.login("alice", "password") + self.room_id = self.helper.create_room_as("alice", tok=self.access_token) + + # Send the phrase as a message and check it was created + response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token) + self.assertIn("event_id", response) + + # The behaviour of a missing trailing double quote changed in PostgreSQL 14 + # from ignoring the initial double quote to treating it as a phrase. + main_store = homeserver.get_datastores().main + found = False + if isinstance(main_store.database_engine, PostgresEngine): + assert main_store.database_engine._version is not None + found = main_store.database_engine._version < 140000 + self.COMMON_CASES.append(('"fox quick', found)) + + def test_tokenize_query(self) -> None: + """Test the custom logic to tokenize a user's query.""" + cases = ( + ("brown", ["brown"]), + ("quick brown", ["quick", SearchToken.And, "brown"]), + ("quick \t brown", ["quick", SearchToken.And, "brown"]), + ('"brown quick"', [Phrase(["brown", "quick"])]), + ("furphy OR fox", ["furphy", SearchToken.Or, "fox"]), + ("fox -brown", ["fox", SearchToken.Not, "brown"]), + ("- fox", [SearchToken.Not, "fox"]), + ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]), + # No trailing double quote. + ('"fox quick', [Phrase(["fox", "quick"])]), + ('"-fox quick', [Phrase(["-fox", "quick"])]), + ('" quick "', [Phrase(["quick"])]), + ( + 'q"uick brow"n', + [ + "q", + SearchToken.And, + Phrase(["uick", "brow"]), + SearchToken.And, + "n", + ], + ), + ( + '-"quick brown"', + [SearchToken.Not, Phrase(["quick", "brown"])], + ), + ) + + for query, expected in cases: + tokenized = _tokenize_query(query) + self.assertEqual( + tokenized, expected, f"{tokenized} != {expected} for {query}" + ) + + def _check_test_cases( + self, store: DataStore, cases: List[Tuple[str, bool]] + ) -> None: + # Run all the test cases versus search_msgs + for query, expect_to_contain in cases: + result = self.get_success( + store.search_msgs([self.room_id], query, ["content.body"]) + ) + self.assertEquals( + result["count"], + 1 if expect_to_contain else 0, + f"expected '{query}' to match '{self.PHRASE}'" + if expect_to_contain + else f"'{query}' unexpectedly matched '{self.PHRASE}'", + ) + self.assertEquals( + len(result["results"]), + 1 if expect_to_contain else 0, + "results array length should match count", + ) + + # Run them again versus search_rooms + for query, expect_to_contain in cases: + result = self.get_success( + store.search_rooms([self.room_id], query, ["content.body"], 10) + ) + self.assertEquals( + result["count"], + 1 if expect_to_contain else 0, + f"expected '{query}' to match '{self.PHRASE}'" + if expect_to_contain + else f"'{query}' unexpectedly matched '{self.PHRASE}'", + ) + self.assertEquals( + len(result["results"]), + 1 if expect_to_contain else 0, + "results array length should match count", + ) + + def test_postgres_web_search_for_phrase(self) -> None: + """ + Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery. + This test is skipped unless the postgres instance supports websearch_to_tsquery. + + See https://www.postgresql.org/docs/current/textsearch-controls.html + """ + + store = self.hs.get_datastores().main + if not isinstance(store.database_engine, PostgresEngine): + raise SkipTest("Test only applies when postgres is used as the database") + + self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES) + + def test_sqlite_search(self) -> None: + """ + Test sqlite searching for phrases. + """ + store = self.hs.get_datastores().main + if not isinstance(store.database_engine, Sqlite3Engine): + raise SkipTest("Test only applies when sqlite is used as the database") + + self._check_test_cases(store, self.COMMON_CASES) |