summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2022-10-27 09:58:12 -0400
committerGitHub <noreply@github.com>2022-10-27 13:58:12 +0000
commit67583281e3f8ea923eedbc56a4c85c7ba75d1582 (patch)
tree88d11b6c4b3dab2a57c804013ec64d33f6a3ae76
parentAdd workers settings to configuration manual (#14086) (diff)
downloadsynapse-67583281e3f8ea923eedbc56a4c85c7ba75d1582.tar.xz
Fix tests for change in PostgreSQL 14 behavior change. (#14310)
PostgreSQL 14 changed the behavior of `websearch_to_tsquery` to
improve some behaviour.

The tests were hitting those edge-cases about handling of hanging double
quotes. This fixes the tests to take into account the PostgreSQL version.
-rw-r--r--changelog.d/14310.feature1
-rw-r--r--synapse/storage/databases/main/search.py5
-rw-r--r--tests/storage/test_room_search.py16
3 files changed, 15 insertions, 7 deletions
diff --git a/changelog.d/14310.feature b/changelog.d/14310.feature
new file mode 100644
index 0000000000..94c8a83212
--- /dev/null
+++ b/changelog.d/14310.feature
@@ -0,0 +1 @@
+Allow use of postgres and sqllite full-text search operators in search queries.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index a89fc54c2c..594b935614 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -824,9 +824,8 @@ def _tokenize_query(query: str) -> TokenList:
     in_phrase = False
     parts = deque(query.split('"'))
     for i, part in enumerate(parts):
-        # The contents inside double quotes is treated as a phrase, a trailing
-        # double quote is not implied.
-        in_phrase = bool(i % 2) and i != (len(parts) - 1)
+        # The contents inside double quotes is treated as a phrase.
+        in_phrase = bool(i % 2)
 
         # Pull out the individual words, discarding any non-word characters.
         words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index 9ddc19900a..868b5bee84 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -239,7 +239,6 @@ class MessageSearchTest(HomeserverTestCase):
         ("fox -nope", (True, False)),
         ("fox -brown", (False, True)),
         ('"fox" quick', True),
-        ('"fox quick', True),
         ('"quick brown', True),
         ('" quick "', True),
         ('" nope"', False),
@@ -269,6 +268,15 @@ class MessageSearchTest(HomeserverTestCase):
         response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
         self.assertIn("event_id", response)
 
+        # The behaviour of a missing trailing double quote changed in PostgreSQL 14
+        # from ignoring the initial double quote to treating it as a phrase.
+        main_store = homeserver.get_datastores().main
+        found = False
+        if isinstance(main_store.database_engine, PostgresEngine):
+            assert main_store.database_engine._version is not None
+            found = main_store.database_engine._version < 140000
+        self.COMMON_CASES.append(('"fox quick', (found, True)))
+
     def test_tokenize_query(self) -> None:
         """Test the custom logic to tokenize a user's query."""
         cases = (
@@ -280,9 +288,9 @@ class MessageSearchTest(HomeserverTestCase):
             ("fox -brown", ["fox", SearchToken.Not, "brown"]),
             ("- fox", [SearchToken.Not, "fox"]),
             ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
-            # No trailing double quoe.
-            ('"fox quick', ["fox", SearchToken.And, "quick"]),
-            ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
+            # No trailing double quote.
+            ('"fox quick', [Phrase(["fox", "quick"])]),
+            ('"-fox quick', [Phrase(["-fox", "quick"])]),
             ('" quick "', [Phrase(["quick"])]),
             (
                 'q"uick brow"n',