From 3ac412b4e2f8c5ba11dc962b8a9d871c1efdce9b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 9 Dec 2022 12:36:32 -0500
Subject: Require types in tests.storage. (#14646)

Adds missing type hints to `tests.storage` package
and does not allow untyped definitions.
---
 synapse/storage/databases/main/end_to_end_keys.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'synapse/storage')

diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 643c47d608..4c691642e2 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -140,7 +140,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @cancellable
     async def get_e2e_device_keys_for_cs_api(
         self,
-        query_list: List[Tuple[str, Optional[str]]],
+        query_list: Collection[Tuple[str, Optional[str]]],
         include_displaynames: bool = True,
     ) -> Dict[str, Dict[str, JsonDict]]:
         """Fetch a list of device keys, formatted suitably for the C/S API.
-- 
cgit 1.5.1


From 373c485d8c7f39206bac60c6ef313b4a1978bbc0 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 9 Dec 2022 23:02:11 +0000
Subject: Handle half-created indices in receipts index background update
 (#14650)

When Synapse is terminated while running the background update to create
the `receipts_graph` or `receipts_linearized` indexes, the indexes may
be successfully created (or marked as invalid on postgres) while the
background update remains unfinished. When Synapse next starts up, the
background update will fail because the index already exists, or exists
but is invalid on postgres.

Use the existing code to create indices in background updates, since it
handles these edge cases.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14650.bugfix                   |  2 ++
 synapse/storage/background_updates.py      | 55 +++++++++++++++++++++++++-----
 synapse/storage/databases/main/receipts.py | 51 +++++++--------------------
 3 files changed, 60 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/14650.bugfix

(limited to 'synapse/storage')

diff --git a/changelog.d/14650.bugfix b/changelog.d/14650.bugfix
new file mode 100644
index 0000000000..5e18641bf7
--- /dev/null
+++ b/changelog.d/14650.bugfix
@@ -0,0 +1,2 @@
+Fix a bug introduced in Synapse 1.72.0 where the background updates to add non-thread unique indexes on receipts would fail if they were previously interrupted.
+
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 2056ecb2c3..a99aea8926 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -544,6 +544,48 @@ class BackgroundUpdater:
                 The named index will be dropped upon completion of the new index.
         """
 
+        async def updater(progress: JsonDict, batch_size: int) -> int:
+            await self.create_index_in_background(
+                index_name=index_name,
+                table=table,
+                columns=columns,
+                where_clause=where_clause,
+                unique=unique,
+                psql_only=psql_only,
+                replaces_index=replaces_index,
+            )
+            await self._end_background_update(update_name)
+            return 1
+
+        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
+            updater, oneshot=True
+        )
+
+    async def create_index_in_background(
+        self,
+        index_name: str,
+        table: str,
+        columns: Iterable[str],
+        where_clause: Optional[str] = None,
+        unique: bool = False,
+        psql_only: bool = False,
+        replaces_index: Optional[str] = None,
+    ) -> None:
+        """Add an index in the background.
+
+        Args:
+            update_name: update_name to register for
+            index_name: name of index to add
+            table: table to add index to
+            columns: columns/expressions to include in index
+            where_clause: A WHERE clause to specify a partial unique index.
+            unique: true to make a UNIQUE index
+            psql_only: true to only create this index on psql databases (useful
+                for virtual sqlite tables)
+            replaces_index: The name of an index that this index replaces.
+                The named index will be dropped upon completion of the new index.
+        """
+
         def create_index_psql(conn: Connection) -> None:
             conn.rollback()
             # postgres insists on autocommit for the index
@@ -618,16 +660,11 @@ class BackgroundUpdater:
         else:
             runner = create_index_sqlite
 
-        async def updater(progress: JsonDict, batch_size: int) -> int:
-            if runner is not None:
-                logger.info("Adding index %s to %s", index_name, table)
-                await self.db_pool.runWithConnection(runner)
-            await self._end_background_update(update_name)
-            return 1
+        if runner is None:
+            return
 
-        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
-            updater, oneshot=True
-        )
+        logger.info("Adding index %s to %s", index_name, table)
+        await self.db_pool.runWithConnection(runner)
 
     async def _end_background_update(self, update_name: str) -> None:
         """Removes a completed background update task from the queue.
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index a580e4bdda..e06725f69c 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -924,39 +924,6 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
 
         return batch_size
 
-    async def _create_receipts_index(self, index_name: str, table: str) -> None:
-        """Adds a unique index on `(room_id, receipt_type, user_id)` to the given
-        receipts table, for non-thread receipts."""
-
-        def _create_index(conn: LoggingDatabaseConnection) -> None:
-            conn.rollback()
-
-            # we have to set autocommit, because postgres refuses to
-            # CREATE INDEX CONCURRENTLY without it.
-            if isinstance(self.database_engine, PostgresEngine):
-                conn.set_session(autocommit=True)
-
-            try:
-                c = conn.cursor()
-
-                # Now that the duplicates are gone, we can create the index.
-                concurrently = (
-                    "CONCURRENTLY"
-                    if isinstance(self.database_engine, PostgresEngine)
-                    else ""
-                )
-                sql = f"""
-                    CREATE UNIQUE INDEX {concurrently} {index_name}
-                    ON {table}(room_id, receipt_type, user_id)
-                    WHERE thread_id IS NULL
-                """
-                c.execute(sql)
-            finally:
-                if isinstance(self.database_engine, PostgresEngine):
-                    conn.set_session(autocommit=False)
-
-        await self.db_pool.runWithConnection(_create_index)
-
     async def _background_receipts_linearized_unique_index(
         self, progress: dict, batch_size: int
     ) -> int:
@@ -999,9 +966,12 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
             _remote_duplicate_receipts_txn,
         )
 
-        await self._create_receipts_index(
-            "receipts_linearized_unique_index",
-            "receipts_linearized",
+        await self.db_pool.updates.create_index_in_background(
+            index_name="receipts_linearized_unique_index",
+            table="receipts_linearized",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
         )
 
         await self.db_pool.updates._end_background_update(
@@ -1050,9 +1020,12 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
             _remote_duplicate_receipts_txn,
         )
 
-        await self._create_receipts_index(
-            "receipts_graph_unique_index",
-            "receipts_graph",
+        await self.db_pool.updates.create_index_in_background(
+            index_name="receipts_graph_unique_index",
+            table="receipts_graph",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
         )
 
         await self.db_pool.updates._end_background_update(
-- 
cgit 1.5.1


From 2a3cd59dd06411a79fb7500970db1b98f0d87695 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 12 Dec 2022 13:21:17 +0100
Subject: Add optional ICU support for user search (#14464)

Fixes #13655

This change uses ICU (International Components for Unicode) to improve boundary detection in user search.

This change also adds a new dependency on libicu-dev and pkg-config for the Debian packages, which are available in all supported distros.
---
 changelog.d/14464.feature                        |  1 +
 debian/changelog                                 |  7 +++
 debian/control                                   |  2 +
 docker/Dockerfile                                |  2 +
 docker/Dockerfile-dhvirtualenv                   |  2 +
 poetry.lock                                      | 16 +++++-
 pyproject.toml                                   |  7 +++
 stubs/icu.pyi                                    | 25 +++++++++
 synapse/storage/databases/main/user_directory.py | 67 ++++++++++++++++++++++--
 tests/storage/test_user_directory.py             | 43 +++++++++++++++
 10 files changed, 166 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14464.feature
 create mode 100644 stubs/icu.pyi

(limited to 'synapse/storage')

diff --git a/changelog.d/14464.feature b/changelog.d/14464.feature
new file mode 100644
index 0000000000..688ea32117
--- /dev/null
+++ b/changelog.d/14464.feature
@@ -0,0 +1 @@
+Improve user search for international display names.
diff --git a/debian/changelog b/debian/changelog
index 163b7210bf..5d3c4f7d6b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+matrix-synapse-py3 (1.74.0~rc1) UNRELEASED; urgency=medium
+
+  * New dependency on libicu-dev to provide improved results for user
+    search.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 06 Dec 2022 15:28:10 +0000
+
 matrix-synapse-py3 (1.73.0) stable; urgency=medium
 
   * New Synapse release 1.73.0.
diff --git a/debian/control b/debian/control
index 86f5a66d02..bc628cec08 100644
--- a/debian/control
+++ b/debian/control
@@ -8,6 +8,8 @@ Build-Depends:
  dh-virtualenv (>= 1.1),
  libsystemd-dev,
  libpq-dev,
+ libicu-dev,
+ pkg-config,
  lsb-release,
  python3-dev,
  python3,
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 185d5bc3d4..7e5123210a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -97,6 +97,8 @@ RUN \
     zlib1g-dev \
     git \
     curl \
+    libicu-dev \
+    pkg-config \
     && rm -rf /var/lib/apt/lists/*
 
 
diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv
index 73165f6f85..f3b5b00ce6 100644
--- a/docker/Dockerfile-dhvirtualenv
+++ b/docker/Dockerfile-dhvirtualenv
@@ -84,6 +84,8 @@ RUN apt-get update -qq -o Acquire::Languages=none \
         python3-venv \
         sqlite3 \
         libpq-dev \
+        libicu-dev \
+        pkg-config \
         xmlsec1
 
 # Install rust and ensure it's in the PATH
diff --git a/poetry.lock b/poetry.lock
index cac22e2ef0..ccda8a23fb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -837,6 +837,14 @@ category = "dev"
 optional = false
 python-versions = ">=3.5"
 
+[[package]]
+name = "pyicu"
+version = "2.10.2"
+description = "Python extension wrapping the ICU C++ API"
+category = "main"
+optional = true
+python-versions = "*"
+
 [[package]]
 name = "pyjwt"
 version = "2.4.0"
@@ -1622,7 +1630,7 @@ docs = ["Sphinx", "repoze.sphinx.autointerface"]
 test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"]
 
 [extras]
-all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler"]
+all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler", "pyicu"]
 cache-memory = ["Pympler"]
 jwt = ["authlib"]
 matrix-synapse-ldap3 = ["matrix-synapse-ldap3"]
@@ -1635,11 +1643,12 @@ sentry = ["sentry-sdk"]
 systemd = ["systemd-python"]
 test = ["parameterized", "idna"]
 url-preview = ["lxml"]
+user-search = ["pyicu"]
 
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "8c44ceeb9df5c3ab43040400e0a6b895de49417e61293a1ba027640b34f03263"
+content-hash = "f20007013f33bc35a01e412c48adc62a936030f3074e06286674c5ad7f44d300"
 
 [metadata.files]
 attrs = [
@@ -2427,6 +2436,9 @@ pygments = [
     {file = "Pygments-2.11.2-py3-none-any.whl", hash = "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65"},
     {file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"},
 ]
+pyicu = [
+    {file = "PyICU-2.10.2.tar.gz", hash = "sha256:0c3309eea7fab6857507ace62403515b60fe096cbfb4f90d14f55ff75c5441c1"},
+]
 pyjwt = [
     {file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"},
     {file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"},
diff --git a/pyproject.toml b/pyproject.toml
index df59fa0562..bb383683cc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -208,6 +208,7 @@ hiredis = { version = "*", optional = true }
 Pympler = { version = "*", optional = true }
 parameterized = { version = ">=0.7.4", optional = true }
 idna = { version = ">=2.5", optional = true }
+pyicu = { version = ">=2.10.2", optional = true }
 
 [tool.poetry.extras]
 # NB: Packages that should be part of `pip install matrix-synapse[all]` need to be specified
@@ -230,6 +231,10 @@ redis = ["txredisapi", "hiredis"]
 # Required to use experimental `caches.track_memory_usage` config option.
 cache-memory = ["pympler"]
 test = ["parameterized", "idna"]
+# Allows for better search for international characters in the user directory. This
+# requires libicu's development headers installed on the system (e.g. libicu-dev on
+# Debian-based distributions).
+user-search = ["pyicu"]
 
 # The duplication here is awful. I hate hate hate hate hate it. However, for now I want
 # to ensure you can still `pip install matrix-synapse[all]` like today. Two motivations:
@@ -261,6 +266,8 @@ all = [
     "txredisapi", "hiredis",
     # cache-memory
     "pympler",
+    # improved user search
+    "pyicu",
     # omitted:
     #   - test: it's useful to have this separate from dev deps in the olddeps job
     #   - systemd: this is a system-based requirement
diff --git a/stubs/icu.pyi b/stubs/icu.pyi
new file mode 100644
index 0000000000..efeda7938a
--- /dev/null
+++ b/stubs/icu.pyi
@@ -0,0 +1,25 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stub for PyICU.
+
+class Locale:
+    @staticmethod
+    def getDefault() -> Locale: ...
+
+class BreakIterator:
+    @staticmethod
+    def createWordInstance(locale: Locale) -> BreakIterator: ...
+    def setText(self, text: str) -> None: ...
+    def nextBoundary(self) -> int: ...
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index af9952f513..14ef5b040d 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -26,6 +26,14 @@ from typing import (
     cast,
 )
 
+try:
+    # Figure out if ICU support is available for searching users.
+    import icu
+
+    USE_ICU = True
+except ModuleNotFoundError:
+    USE_ICU = False
+
 from typing_extensions import TypedDict
 
 from synapse.api.errors import StoreError
@@ -900,7 +908,7 @@ def _parse_query_sqlite(search_term: str) -> str:
     """
 
     # Pull out the individual words, discarding any non-word characters.
-    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+    results = _parse_words(search_term)
     return " & ".join("(%s* OR %s)" % (result, result) for result in results)
 
 
@@ -910,12 +918,63 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]:
     We use this so that we can add prefix matching, which isn't something
     that is supported by default.
     """
-
-    # Pull out the individual words, discarding any non-word characters.
-    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+    results = _parse_words(search_term)
 
     both = " & ".join("(%s:* | %s)" % (result, result) for result in results)
     exact = " & ".join("%s" % (result,) for result in results)
     prefix = " & ".join("%s:*" % (result,) for result in results)
 
     return both, exact, prefix
+
+
+def _parse_words(search_term: str) -> List[str]:
+    """Split the provided search string into a list of its words.
+
+    If support for ICU (International Components for Unicode) is available, use it.
+    Otherwise, fall back to using a regex to detect word boundaries. This latter
+    solution works well enough for most latin-based languages, but doesn't work as well
+    with other languages.
+
+    Args:
+        search_term: The search string.
+
+    Returns:
+        A list of the words in the search string.
+    """
+    if USE_ICU:
+        return _parse_words_with_icu(search_term)
+
+    return re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+
+
+def _parse_words_with_icu(search_term: str) -> List[str]:
+    """Break down the provided search string into its individual words using ICU
+    (International Components for Unicode).
+
+    Args:
+        search_term: The search string.
+
+    Returns:
+        A list of the words in the search string.
+    """
+    results = []
+    breaker = icu.BreakIterator.createWordInstance(icu.Locale.getDefault())
+    breaker.setText(search_term)
+    i = 0
+    while True:
+        j = breaker.nextBoundary()
+        if j < 0:
+            break
+
+        result = search_term[i:j]
+
+        # libicu considers spaces and punctuation between words as words, but we don't
+        # want to include those in results as they would result in syntax errors in SQL
+        # queries (e.g. "foo bar" would result in the search query including "foo &  &
+        # bar").
+        if len(re.findall(r"([\w\-]+)", result, re.UNICODE)):
+            results.append(result)
+
+        i = j
+
+    return results
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index 88c7d5fec0..3ba896ecf3 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
 from typing import Any, Dict, Set, Tuple
 from unittest import mock
 from unittest.mock import Mock, patch
@@ -30,6 +31,12 @@ from synapse.util import Clock
 from tests.test_utils.event_injection import inject_member_event
 from tests.unittest import HomeserverTestCase, override_config
 
+try:
+    import icu
+except ImportError:
+    icu = None  # type: ignore
+
+
 ALICE = "@alice:a"
 BOB = "@bob:b"
 BOBBY = "@bobby:a"
@@ -467,3 +474,39 @@ class UserDirectoryStoreTestCase(HomeserverTestCase):
             r["results"][0],
             {"user_id": BELA, "display_name": "Bela", "avatar_url": None},
         )
+
+
+class UserDirectoryICUTestCase(HomeserverTestCase):
+    if not icu:
+        skip = "Requires PyICU"
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+        self.user_dir_helper = GetUserDirectoryTables(self.store)
+
+    def test_icu_word_boundary(self) -> None:
+        """Tests that we correctly detect word boundaries when ICU (International
+        Components for Unicode) support is available.
+        """
+
+        display_name = "Gáo"
+
+        # This word is not broken down correctly by Python's regular expressions,
+        # likely because á is actually a lowercase a followed by a U+0301 combining
+        # acute accent. This is specifically something that ICU support fixes.
+        matches = re.findall(r"([\w\-]+)", display_name, re.UNICODE)
+        self.assertEqual(len(matches), 2)
+
+        self.get_success(
+            self.store.update_profile_in_user_dir(ALICE, display_name, None)
+        )
+        self.get_success(self.store.add_users_in_public_rooms("!room:id", (ALICE,)))
+
+        # Check that searching for this user yields the correct result.
+        r = self.get_success(self.store.search_user_dir(BOB, display_name, 10))
+        self.assertFalse(r["limited"])
+        self.assertEqual(len(r["results"]), 1)
+        self.assertDictEqual(
+            r["results"][0],
+            {"user_id": ALICE, "display_name": display_name, "avatar_url": None},
+        )
-- 
cgit 1.5.1


From 74b89c27613a34ec9b291ad3066db7ce0adff1db Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 12 Dec 2022 13:55:23 +0000
Subject: Revert the deletion of stale devices due to performance issues.
 (#14662)

---
 changelog.d/14595.misc                    |  1 -
 changelog.d/14649.misc                    |  1 -
 changelog.d/14662.removal                 |  1 +
 synapse/handlers/device.py                | 33 +-----------
 synapse/storage/databases/main/devices.py | 84 +------------------------------
 tests/handlers/test_device.py             | 33 +-----------
 tests/storage/test_client_ips.py          |  4 +-
 7 files changed, 5 insertions(+), 152 deletions(-)
 delete mode 100644 changelog.d/14595.misc
 delete mode 100644 changelog.d/14649.misc
 create mode 100644 changelog.d/14662.removal

(limited to 'synapse/storage')

diff --git a/changelog.d/14595.misc b/changelog.d/14595.misc
deleted file mode 100644
index f9bfc581ad..0000000000
--- a/changelog.d/14595.misc
+++ /dev/null
@@ -1 +0,0 @@
-Prune user's old devices on login if they have too many.
diff --git a/changelog.d/14649.misc b/changelog.d/14649.misc
deleted file mode 100644
index f9bfc581ad..0000000000
--- a/changelog.d/14649.misc
+++ /dev/null
@@ -1 +0,0 @@
-Prune user's old devices on login if they have too many.
diff --git a/changelog.d/14662.removal b/changelog.d/14662.removal
new file mode 100644
index 0000000000..19a387bbb4
--- /dev/null
+++ b/changelog.d/14662.removal
@@ -0,0 +1 @@
+(remove from changelog: unreleased) Revert the deletion of stale devices due to performance issues.
\ No newline at end of file
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index c935c7be90..d4750a32e6 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -52,7 +52,6 @@ from synapse.util import stringutils
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.cancellation import cancellable
-from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import measure_func
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -422,9 +421,6 @@ class DeviceHandler(DeviceWorkerHandler):
 
         self._check_device_name_length(initial_device_display_name)
 
-        # Prune the user's device list if they already have a lot of devices.
-        await self._prune_too_many_devices(user_id)
-
         if device_id is not None:
             new_device = await self.store.store_device(
                 user_id=user_id,
@@ -456,33 +452,6 @@ class DeviceHandler(DeviceWorkerHandler):
 
         raise errors.StoreError(500, "Couldn't generate a device ID.")
 
-    async def _prune_too_many_devices(self, user_id: str) -> None:
-        """Delete any excess old devices this user may have."""
-        device_ids = await self.store.check_too_many_devices_for_user(user_id, 100)
-        if not device_ids:
-            return
-
-        logger.info("Pruning %d old devices for user %s", len(device_ids), user_id)
-
-        # We don't want to block and try and delete tonnes of devices at once,
-        # so we cap the number of devices we delete synchronously.
-        first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
-        await self.delete_devices(user_id, first_batch)
-
-        if not remaining_device_ids:
-            return
-
-        # Now spawn a background loop that deletes the rest.
-        async def _prune_too_many_devices_loop() -> None:
-            for batch in batch_iter(remaining_device_ids, 10):
-                await self.delete_devices(user_id, batch)
-
-                await self.clock.sleep(1)
-
-        run_as_background_process(
-            "_prune_too_many_devices_loop", _prune_too_many_devices_loop
-        )
-
     async def _delete_stale_devices(self) -> None:
         """Background task that deletes devices which haven't been accessed for more than
         a configured time period.
@@ -512,7 +481,7 @@ class DeviceHandler(DeviceWorkerHandler):
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 95d4c0622d..a5bb4d404e 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1569,77 +1569,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
-    async def check_too_many_devices_for_user(
-        self, user_id: str, limit: int
-    ) -> List[str]:
-        """Check if the user has a lot of devices, and if so return the set of
-        devices we can prune.
-
-        This does *not* return hidden devices or devices with E2E keys.
-
-        Returns at most `limit` number of devices, ordered by last seen.
-        """
-
-        num_devices = await self.db_pool.simple_select_one_onecol(
-            table="devices",
-            keyvalues={"user_id": user_id, "hidden": False},
-            retcol="COALESCE(COUNT(*), 0)",
-            desc="count_devices",
-        )
-
-        # We let users have up to ten devices without pruning.
-        if num_devices <= 10:
-            return []
-
-        # We prune everything older than N days.
-        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
-
-        if num_devices > 50:
-            # If the user has more than 50 devices, then we chose a last seen
-            # that ensures we keep at most 50 devices.
-            sql = """
-                SELECT last_seen FROM devices
-                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-                WHERE
-                    user_id = ?
-                    AND NOT hidden
-                    AND last_seen IS NOT NULL
-                    AND key_json IS NULL
-                ORDER BY last_seen DESC
-                LIMIT 1
-                OFFSET 50
-            """
-
-            rows = await self.db_pool.execute(
-                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
-            )
-            if rows:
-                max_last_seen = max(rows[0][0], max_last_seen)
-
-        # Now fetch the devices to delete.
-        sql = """
-            SELECT device_id FROM devices
-            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-            WHERE
-                user_id = ?
-                AND NOT hidden
-                AND last_seen < ?
-                AND key_json IS NULL
-            ORDER BY last_seen
-            LIMIT ?
-        """
-
-        def check_too_many_devices_for_user_txn(
-            txn: LoggingTransaction,
-        ) -> List[str]:
-            txn.execute(sql, (user_id, max_last_seen, limit))
-            return [device_id for device_id, in txn]
-
-        return await self.db_pool.runInteraction(
-            "check_too_many_devices_for_user",
-            check_too_many_devices_for_user_txn,
-        )
-
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1698,7 +1627,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
-                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1744,15 +1672,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             )
             raise StoreError(500, "Problem storing device.")
 
-    @cached(max_entries=0)
-    async def delete_device(self, user_id: str, device_id: str) -> None:
-        raise NotImplementedError()
-
-    # Note: sometimes deleting rows out of `device_inbox` can take a long time,
-    # so we use a cache so that we deduplicate in flight requests to delete
-    # devices.
-    @cachedList(cached_method_name="delete_device", list_name="device_ids")
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Deletes several devices.
 
         Args:
@@ -1789,8 +1709,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
-        return {}
-
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index e51cac9b33..ce7525e29c 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -20,8 +20,6 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
-from synapse.rest import admin
-from synapse.rest.client import account, login
 from synapse.server import HomeServer
 from synapse.util import Clock
 
@@ -32,12 +30,6 @@ user2 = "@theresa:bbb"
 
 
 class DeviceTestCase(unittest.HomeserverTestCase):
-    servlets = [
-        login.register_servlets,
-        admin.register_servlets,
-        account.register_servlets,
-    ]
-
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server", federation_http_client=None)
         handler = hs.get_device_handler()
@@ -123,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": 1000000,
+                "last_seen_ts": None,
             },
             device_map["xyz"],
         )
@@ -237,29 +229,6 @@ class DeviceTestCase(unittest.HomeserverTestCase):
             NotFoundError,
         )
 
-    def test_login_delete_old_devices(self) -> None:
-        """Delete old devices if the user already has too many."""
-
-        user_id = self.register_user("user", "pass")
-
-        # Create a bunch of devices
-        for _ in range(50):
-            self.login("user", "pass")
-            self.reactor.advance(1)
-
-        # Advance the clock for ages (as we only delete old devices)
-        self.reactor.advance(60 * 60 * 24 * 300)
-
-        # Log in again to start the pruning
-        self.login("user", "pass")
-
-        # Give the background job time to do its thing
-        self.reactor.pump([1.0] * 100)
-
-        # We should now only have the most recent device.
-        devices = self.get_success(self.handler.get_devices_by_user(user_id))
-        self.assertEqual(len(devices), 1)
-
     def _record_users(self) -> None:
         # check this works for both devices which have a recorded client_ip,
         # and those which don't.
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 81e4e596e4..7f7f4ef892 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -170,8 +170,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        last_seen = self.clock.time_msec()
-
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -192,7 +190,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": last_seen,
+                        "last_seen": None,
                     },
                 ],
             )
-- 
cgit 1.5.1