diff options
author | Brendan Abolivier <babolivier@matrix.org> | 2022-12-12 13:21:17 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-12 13:21:17 +0100 |
commit | 2a3cd59dd06411a79fb7500970db1b98f0d87695 (patch) | |
tree | 074cfcb2203b2bea4612c790719dcec67d86631e /tests | |
parent | Bump phonenumbers from 8.13.1 to 8.13.2 (#14660) (diff) | |
download | synapse-2a3cd59dd06411a79fb7500970db1b98f0d87695.tar.xz |
Add optional ICU support for user search (#14464)
Fixes #13655 This change uses ICU (International Components for Unicode) to improve boundary detection in user search. This change also adds a new dependency on libicu-dev and pkg-config for the Debian packages, which are available in all supported distros.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/storage/test_user_directory.py | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 88c7d5fec0..3ba896ecf3 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re from typing import Any, Dict, Set, Tuple from unittest import mock from unittest.mock import Mock, patch @@ -30,6 +31,12 @@ from synapse.util import Clock from tests.test_utils.event_injection import inject_member_event from tests.unittest import HomeserverTestCase, override_config +try: + import icu +except ImportError: + icu = None # type: ignore + + ALICE = "@alice:a" BOB = "@bob:b" BOBBY = "@bobby:a" @@ -467,3 +474,39 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): r["results"][0], {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, ) + + +class UserDirectoryICUTestCase(HomeserverTestCase): + if not icu: + skip = "Requires PyICU" + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.user_dir_helper = GetUserDirectoryTables(self.store) + + def test_icu_word_boundary(self) -> None: + """Tests that we correctly detect word boundaries when ICU (International + Components for Unicode) support is available. + """ + + display_name = "Gáo" + + # This word is not broken down correctly by Python's regular expressions, + # likely because á is actually a lowercase a followed by a U+0301 combining + # acute accent. This is specifically something that ICU support fixes. + matches = re.findall(r"([\w\-]+)", display_name, re.UNICODE) + self.assertEqual(len(matches), 2) + + self.get_success( + self.store.update_profile_in_user_dir(ALICE, display_name, None) + ) + self.get_success(self.store.add_users_in_public_rooms("!room:id", (ALICE,))) + + # Check that searching for this user yields the correct result. + r = self.get_success(self.store.search_user_dir(BOB, display_name, 10)) + self.assertFalse(r["limited"]) + self.assertEqual(len(r["results"]), 1) + self.assertDictEqual( + r["results"][0], + {"user_id": ALICE, "display_name": display_name, "avatar_url": None}, + ) |