summary refs log tree commit diff
diff options
context:
space:
mode:
authorDavid Robertson <davidr@element.io>2021-10-04 12:45:51 +0100
committerGitHub <noreply@github.com>2021-10-04 11:45:51 +0000
commitf7b034a24bd5e64f05934453fe7b072894e124db (patch)
tree5be03263da178ddf388fa69faf4ae8dd954cfeba
parentUse direct references for configuration variables (part 7). (#10959) (diff)
downloadsynapse-f7b034a24bd5e64f05934453fe7b072894e124db.tar.xz
Consistently exclude from user_directory (#10960)
* Introduce `should_include_local_users_in_dir`

We exclude three kinds of local users from the user_directory tables. At
present we don't consistently exclude all three in the same places. This
commit introduces a new function to gather those exclusion conditions
together. Because we have to handle local and remote users in different
ways, I've made that function only consider the case of remote users.
It's the caller's responsibility to make the local versus remote
distinction clear and correct.

A test fixup is required. The test now hits a path which makes db
queries against the users table. The expected rows were missing, because
we were using a dummy user that hadn't actually been registered.

We also add new test cases to covert the exclusion logic.

----

By my reading this makes these changes:

* When an app service user registers or changes their profile, they will
  _not_ be added to the user directory. (Previously only support and
  deactivated users were excluded). This is consistent with the logic that
  rebuilds the user directory. See also [the discussion
  here](https://github.com/matrix-org/synapse/pull/10914#discussion_r716859548).
* When rebuilding the directory, exclude support and disabled users from
  room sharing tables. Previously only appservice users were excluded.
* Exclude all three categories of local users when rebuilding the
  directory. Previously `_populate_user_directory_process_users` didn't do
  any exclusion.

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
-rw-r--r--changelog.d/10960.bugfix1
-rw-r--r--synapse/handlers/user_directory.py27
-rw-r--r--synapse/storage/databases/main/user_directory.py46
-rw-r--r--tests/handlers/test_user_directory.py200
-rw-r--r--tests/rest/client/test_login.py17
-rw-r--r--tests/storage/test_user_directory.py146
-rw-r--r--tests/unittest.py29
7 files changed, 409 insertions, 57 deletions
diff --git a/changelog.d/10960.bugfix b/changelog.d/10960.bugfix
new file mode 100644
index 0000000000..b4f1c228ea
--- /dev/null
+++ b/changelog.d/10960.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where rebuilding the user directory wouldn't exclude support and disabled users.
\ No newline at end of file
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index f4430ce3c9..18d8c8744e 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -132,12 +132,7 @@ class UserDirectoryHandler(StateDeltasHandler):
         # FIXME(#3714): We should probably do this in the same worker as all
         # the other changes.
 
-        # Support users are for diagnostics and should not appear in the user directory.
-        is_support = await self.store.is_support_user(user_id)
-        # When change profile information of deactivated user it should not appear in the user directory.
-        is_deactivated = await self.store.get_user_deactivated_status(user_id)
-
-        if not (is_support or is_deactivated):
+        if await self.store.should_include_local_user_in_dir(user_id):
             await self.store.update_profile_in_user_dir(
                 user_id, profile.display_name, profile.avatar_url
             )
@@ -229,8 +224,10 @@ class UserDirectoryHandler(StateDeltasHandler):
                     else:
                         logger.debug("Server is still in room: %r", room_id)
 
-                is_support = await self.store.is_support_user(state_key)
-                if not is_support:
+                include_in_dir = not self.is_mine_id(
+                    state_key
+                ) or await self.store.should_include_local_user_in_dir(state_key)
+                if include_in_dir:
                     if change is MatchChange.no_change:
                         # Handle any profile changes
                         await self._handle_profile_change(
@@ -356,13 +353,7 @@ class UserDirectoryHandler(StateDeltasHandler):
 
             # First, if they're our user then we need to update for every user
             if self.is_mine_id(user_id):
-
-                is_appservice = self.store.get_if_app_services_interested_in_user(
-                    user_id
-                )
-
-                # We don't care about appservice users.
-                if not is_appservice:
+                if await self.store.should_include_local_user_in_dir(user_id):
                     for other_user_id in other_users_in_room:
                         if user_id == other_user_id:
                             continue
@@ -374,10 +365,10 @@ class UserDirectoryHandler(StateDeltasHandler):
                 if user_id == other_user_id:
                     continue
 
-                is_appservice = self.store.get_if_app_services_interested_in_user(
+                include_other_user = self.is_mine_id(
                     other_user_id
-                )
-                if self.is_mine_id(other_user_id) and not is_appservice:
+                ) and await self.store.should_include_local_user_in_dir(other_user_id)
+                if include_other_user:
                     to_insert.add((other_user_id, user_id))
 
             if to_insert:
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index c26e3e066f..5f538947ec 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -40,12 +40,10 @@ from synapse.util.caches.descriptors import cached
 
 logger = logging.getLogger(__name__)
 
-
 TEMP_TABLE = "_temp_populate_user_directory"
 
 
 class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
-
     # How many records do we calculate before sending it to
     # add_users_who_share_private_rooms?
     SHARE_PRIVATE_WORKING_SET = 500
@@ -235,6 +233,13 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 )
 
                 users_with_profile = await self.get_users_in_room_with_profiles(room_id)
+                # Throw away users excluded from the directory.
+                users_with_profile = {
+                    user_id: profile
+                    for user_id, profile in users_with_profile.items()
+                    if not self.hs.is_mine_id(user_id)
+                    or await self.should_include_local_user_in_dir(user_id)
+                }
 
                 # Update each user in the user directory.
                 for user_id, profile in users_with_profile.items():
@@ -246,9 +251,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 
                 if is_public:
                     for user_id in users_with_profile:
-                        if self.get_if_app_services_interested_in_user(user_id):
-                            continue
-
                         to_insert.add(user_id)
 
                     if to_insert:
@@ -256,12 +258,12 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                         to_insert.clear()
                 else:
                     for user_id in users_with_profile:
+                        # We want the set of pairs (L, M) where L and M are
+                        # in `users_with_profile` and L is local.
+                        # Do so by looking for the local user L first.
                         if not self.hs.is_mine_id(user_id):
                             continue
 
-                        if self.get_if_app_services_interested_in_user(user_id):
-                            continue
-
                         for other_user_id in users_with_profile:
                             if user_id == other_user_id:
                                 continue
@@ -349,10 +351,11 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
         )
 
         for user_id in users_to_work_on:
-            profile = await self.get_profileinfo(get_localpart_from_id(user_id))
-            await self.update_profile_in_user_dir(
-                user_id, profile.display_name, profile.avatar_url
-            )
+            if await self.should_include_local_user_in_dir(user_id):
+                profile = await self.get_profileinfo(get_localpart_from_id(user_id))
+                await self.update_profile_in_user_dir(
+                    user_id, profile.display_name, profile.avatar_url
+                )
 
             # We've finished processing a user. Delete it from the table.
             await self.db_pool.simple_delete_one(
@@ -369,6 +372,24 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 
         return len(users_to_work_on)
 
+    async def should_include_local_user_in_dir(self, user: str) -> bool:
+        """Certain classes of local user are omitted from the user directory.
+        Is this user one of them?
+        """
+        # App service users aren't usually contactable, so exclude them.
+        if self.get_if_app_services_interested_in_user(user):
+            # TODO we might want to make this configurable for each app service
+            return False
+
+        # Support users are for diagnostics and should not appear in the user directory.
+        if await self.is_support_user(user):
+            return False
+
+        # Deactivated users aren't contactable, so should not appear in the user directory.
+        if await self.get_user_deactivated_status(user):
+            return False
+        return True
+
     async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool:
         """Check if the room is either world_readable or publically joinable"""
 
@@ -537,7 +558,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 
 
 class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
-
     # How many records do we calculate before sending it to
     # add_users_who_share_private_rooms?
     SHARE_PRIVATE_WORKING_SET = 500
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index 2988befb21..b3c3af113b 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Tuple
 from unittest.mock import Mock, patch
 from urllib.parse import quote
 
@@ -20,7 +21,8 @@ from twisted.test.proto_helpers import MemoryReactor
 import synapse.rest.admin
 from synapse.api.constants import UserTypes
 from synapse.api.room_versions import RoomVersion, RoomVersions
-from synapse.rest.client import login, room, user_directory
+from synapse.appservice import ApplicationService
+from synapse.rest.client import login, register, room, user_directory
 from synapse.server import HomeServer
 from synapse.storage.roommember import ProfileInfo
 from synapse.types import create_requester
@@ -28,6 +30,7 @@ from synapse.util import Clock
 
 from tests import unittest
 from tests.storage.test_user_directory import GetUserDirectoryTables
+from tests.test_utils.event_injection import inject_member_event
 from tests.unittest import override_config
 
 
@@ -47,13 +50,29 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
     servlets = [
         login.register_servlets,
         synapse.rest.admin.register_servlets,
+        register.register_servlets,
         room.register_servlets,
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         config = self.default_config()
         config["update_user_directory"] = True
-        return self.setup_test_homeserver(config=config)
+
+        self.appservice = ApplicationService(
+            token="i_am_an_app_service",
+            hostname="test",
+            id="1234",
+            namespaces={"users": [{"regex": r"@as_user.*", "exclusive": True}]},
+            sender="@as:test",
+        )
+
+        mock_load_appservices = Mock(return_value=[self.appservice])
+        with patch(
+            "synapse.storage.databases.main.appservice.load_appservices",
+            mock_load_appservices,
+        ):
+            hs = self.setup_test_homeserver(config=config)
+        return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastore()
@@ -62,6 +81,137 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         self.event_creation_handler = self.hs.get_event_creation_handler()
         self.user_dir_helper = GetUserDirectoryTables(self.store)
 
+    def test_normal_user_pair(self) -> None:
+        """Sanity check that the room-sharing tables are updated correctly."""
+        alice = self.register_user("alice", "pass")
+        alice_token = self.login(alice, "pass")
+        bob = self.register_user("bob", "pass")
+        bob_token = self.login(bob, "pass")
+
+        public = self.helper.create_room_as(
+            alice,
+            is_public=True,
+            extra_content={"visibility": "public"},
+            tok=alice_token,
+        )
+        private = self.helper.create_room_as(alice, is_public=False, tok=alice_token)
+        self.helper.invite(private, alice, bob, tok=alice_token)
+        self.helper.join(public, bob, tok=bob_token)
+        self.helper.join(private, bob, tok=bob_token)
+
+        # Alice also makes a second public room but no-one else joins
+        public2 = self.helper.create_room_as(
+            alice,
+            is_public=True,
+            extra_content={"visibility": "public"},
+            tok=alice_token,
+        )
+
+        users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
+        in_public = self.get_success(self.user_dir_helper.get_users_in_public_rooms())
+        in_private = self.get_success(
+            self.user_dir_helper.get_users_who_share_private_rooms()
+        )
+
+        self.assertEqual(users, {alice, bob})
+        self.assertEqual(
+            set(in_public), {(alice, public), (bob, public), (alice, public2)}
+        )
+        self.assertEqual(
+            self.user_dir_helper._compress_shared(in_private),
+            {(alice, bob, private), (bob, alice, private)},
+        )
+
+    # The next three tests (test_population_excludes_*) all setup
+    #   - A normal user included in the user dir
+    #   - A public and private room created by that user
+    #   - A user excluded from the room dir, belonging to both rooms
+
+    # They match similar logic in storage/test_user_directory. But that tests
+    # rebuilding the directory; this tests updating it incrementally.
+
+    def test_excludes_support_user(self) -> None:
+        alice = self.register_user("alice", "pass")
+        alice_token = self.login(alice, "pass")
+        support = "@support1:test"
+        self.get_success(
+            self.store.register_user(
+                user_id=support, password_hash=None, user_type=UserTypes.SUPPORT
+            )
+        )
+
+        public, private = self._create_rooms_and_inject_memberships(
+            alice, alice_token, support
+        )
+        self._check_only_one_user_in_directory(alice, public)
+
+    def test_excludes_deactivated_user(self) -> None:
+        admin = self.register_user("admin", "pass", admin=True)
+        admin_token = self.login(admin, "pass")
+        user = self.register_user("naughty", "pass")
+
+        # Deactivate the user.
+        channel = self.make_request(
+            "PUT",
+            f"/_synapse/admin/v2/users/{user}",
+            access_token=admin_token,
+            content={"deactivated": True},
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["deactivated"], True)
+
+        # Join the deactivated user to rooms owned by the admin.
+        # Is this something that could actually happen outside of a test?
+        public, private = self._create_rooms_and_inject_memberships(
+            admin, admin_token, user
+        )
+        self._check_only_one_user_in_directory(admin, public)
+
+    def test_excludes_appservices_user(self) -> None:
+        # Register an AS user.
+        user = self.register_user("user", "pass")
+        token = self.login(user, "pass")
+        as_user = self.register_appservice_user("as_user_potato", self.appservice.token)
+
+        # Join the AS user to rooms owned by the normal user.
+        public, private = self._create_rooms_and_inject_memberships(
+            user, token, as_user
+        )
+        self._check_only_one_user_in_directory(user, public)
+
+    def _create_rooms_and_inject_memberships(
+        self, creator: str, token: str, joiner: str
+    ) -> Tuple[str, str]:
+        """Create a public and private room as a normal user.
+        Then get the `joiner` into those rooms.
+        """
+        # TODO: Duplicates the same-named method in UserDirectoryInitialPopulationTest.
+        public_room = self.helper.create_room_as(
+            creator,
+            is_public=True,
+            # See https://github.com/matrix-org/synapse/issues/10951
+            extra_content={"visibility": "public"},
+            tok=token,
+        )
+        private_room = self.helper.create_room_as(creator, is_public=False, tok=token)
+
+        # HACK: get the user into these rooms
+        self.get_success(inject_member_event(self.hs, public_room, joiner, "join"))
+        self.get_success(inject_member_event(self.hs, private_room, joiner, "join"))
+
+        return public_room, private_room
+
+    def _check_only_one_user_in_directory(self, user: str, public: str) -> None:
+        users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
+        in_public = self.get_success(self.user_dir_helper.get_users_in_public_rooms())
+        in_private = self.get_success(
+            self.user_dir_helper.get_users_who_share_private_rooms()
+        )
+
+        self.assertEqual(users, {user})
+        self.assertEqual(set(in_public), {(user, public)})
+        self.assertEqual(in_private, [])
+
     def test_handle_local_profile_change_with_support_user(self) -> None:
         support_user_id = "@support:test"
         self.get_success(
@@ -125,6 +275,26 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         profile = self.get_success(self.store.get_user_in_directory(r_user_id))
         self.assertTrue(profile is None)
 
+    def test_handle_local_profile_change_with_appservice_user(self) -> None:
+        # create user
+        as_user_id = self.register_appservice_user(
+            "as_user_alice", self.appservice.token
+        )
+
+        # profile is not in directory
+        profile = self.get_success(self.store.get_user_in_directory(as_user_id))
+        self.assertTrue(profile is None)
+
+        # update profile
+        profile_info = ProfileInfo(avatar_url="avatar_url", display_name="4L1c3")
+        self.get_success(
+            self.handler.handle_local_profile_change(as_user_id, profile_info)
+        )
+
+        # profile is still not in directory
+        profile = self.get_success(self.store.get_user_in_directory(as_user_id))
+        self.assertTrue(profile is None)
+
     def test_handle_user_deactivated_support_user(self) -> None:
         s_user_id = "@support:test"
         self.get_success(
@@ -483,8 +653,6 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
 
 
 class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
-    user_id = "@test:test"
-
     servlets = [
         user_directory.register_servlets,
         room.register_servlets,
@@ -504,16 +672,21 @@ class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
     def test_disabling_room_list(self) -> None:
         self.config.userdirectory.user_directory_search_enabled = True
 
-        # First we create a room with another user so that user dir is non-empty
-        # for our user
-        self.helper.create_room_as(self.user_id)
+        # Create two users and put them in the same room.
+        u1 = self.register_user("user1", "pass")
+        u1_token = self.login(u1, "pass")
         u2 = self.register_user("user2", "pass")
-        room = self.helper.create_room_as(self.user_id)
-        self.helper.join(room, user=u2)
+        u2_token = self.login(u2, "pass")
+
+        room = self.helper.create_room_as(u1, tok=u1_token)
+        self.helper.join(room, user=u2, tok=u2_token)
 
-        # Assert user directory is not empty
+        # Each should see the other when searching the user directory.
         channel = self.make_request(
-            "POST", b"user_directory/search", b'{"search_term":"user2"}'
+            "POST",
+            b"user_directory/search",
+            b'{"search_term":"user2"}',
+            access_token=u1_token,
         )
         self.assertEquals(200, channel.code, channel.result)
         self.assertTrue(len(channel.json_body["results"]) > 0)
@@ -521,7 +694,10 @@ class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
         # Disable user directory and check search returns nothing
         self.config.userdirectory.user_directory_search_enabled = False
         channel = self.make_request(
-            "POST", b"user_directory/search", b'{"search_term":"user2"}'
+            "POST",
+            b"user_directory/search",
+            b'{"search_term":"user2"}',
+            access_token=u1_token,
         )
         self.assertEquals(200, channel.code, channel.result)
         self.assertTrue(len(channel.json_body["results"]) == 0)
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index 7fd92c94e0..a63f04bd41 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -1064,13 +1064,6 @@ class AppserviceLoginRestServletTestCase(unittest.HomeserverTestCase):
         register.register_servlets,
     ]
 
-    def register_as_user(self, username):
-        self.make_request(
-            b"POST",
-            "/_matrix/client/r0/register?access_token=%s" % (self.service.token,),
-            {"username": username},
-        )
-
     def make_homeserver(self, reactor, clock):
         self.hs = self.setup_test_homeserver()
 
@@ -1107,7 +1100,7 @@ class AppserviceLoginRestServletTestCase(unittest.HomeserverTestCase):
 
     def test_login_appservice_user(self):
         """Test that an appservice user can use /login"""
-        self.register_as_user(AS_USER)
+        self.register_appservice_user(AS_USER, self.service.token)
 
         params = {
             "type": login.LoginRestServlet.APPSERVICE_TYPE,
@@ -1121,7 +1114,7 @@ class AppserviceLoginRestServletTestCase(unittest.HomeserverTestCase):
 
     def test_login_appservice_user_bot(self):
         """Test that the appservice bot can use /login"""
-        self.register_as_user(AS_USER)
+        self.register_appservice_user(AS_USER, self.service.token)
 
         params = {
             "type": login.LoginRestServlet.APPSERVICE_TYPE,
@@ -1135,7 +1128,7 @@ class AppserviceLoginRestServletTestCase(unittest.HomeserverTestCase):
 
     def test_login_appservice_wrong_user(self):
         """Test that non-as users cannot login with the as token"""
-        self.register_as_user(AS_USER)
+        self.register_appservice_user(AS_USER, self.service.token)
 
         params = {
             "type": login.LoginRestServlet.APPSERVICE_TYPE,
@@ -1149,7 +1142,7 @@ class AppserviceLoginRestServletTestCase(unittest.HomeserverTestCase):
 
     def test_login_appservice_wrong_as(self):
         """Test that as users cannot login with wrong as token"""
-        self.register_as_user(AS_USER)
+        self.register_appservice_user(AS_USER, self.service.token)
 
         params = {
             "type": login.LoginRestServlet.APPSERVICE_TYPE,
@@ -1165,7 +1158,7 @@ class AppserviceLoginRestServletTestCase(unittest.HomeserverTestCase):
         """Test that users must provide a token when using the appservice
         login method
         """
-        self.register_as_user(AS_USER)
+        self.register_appservice_user(AS_USER, self.service.token)
 
         params = {
             "type": login.LoginRestServlet.APPSERVICE_TYPE,
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index 74c8a8599e..6884ca9b7a 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -12,15 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import Dict, List, Set, Tuple
+from unittest.mock import Mock, patch
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.constants import UserTypes
+from synapse.appservice import ApplicationService
 from synapse.rest import admin
-from synapse.rest.client import login, room
+from synapse.rest.client import login, register, room
 from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.util import Clock
 
+from tests.test_utils.event_injection import inject_member_event
 from tests.unittest import HomeserverTestCase, override_config
 
 ALICE = "@alice:a"
@@ -64,6 +68,14 @@ class GetUserDirectoryTables:
             ["user_id", "other_user_id", "room_id"],
         )
 
+    async def get_users_in_user_directory(self) -> Set[str]:
+        result = await self.store.db_pool.simple_select_list(
+            "user_directory",
+            None,
+            ["user_id"],
+        )
+        return {row["user_id"] for row in result}
+
 
 class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
     """Ensure that rebuilding the directory writes the correct data to the DB.
@@ -74,10 +86,28 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
 
     servlets = [
         login.register_servlets,
-        admin.register_servlets_for_client_rest_resource,
+        admin.register_servlets,
         room.register_servlets,
+        register.register_servlets,
     ]
 
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        self.appservice = ApplicationService(
+            token="i_am_an_app_service",
+            hostname="test",
+            id="1234",
+            namespaces={"users": [{"regex": r"@as_user.*", "exclusive": True}]},
+            sender="@as:test",
+        )
+
+        mock_load_appservices = Mock(return_value=[self.appservice])
+        with patch(
+            "synapse.storage.databases.main.appservice.load_appservices",
+            mock_load_appservices,
+        ):
+            hs = super().make_homeserver(reactor, clock)
+        return hs
+
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastore()
         self.user_dir_helper = GetUserDirectoryTables(self.store)
@@ -204,6 +234,118 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
             {(u1, u3, private_room), (u3, u1, private_room)},
         )
 
+        # All three should have entries in the directory
+        users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
+        self.assertEqual(users, {u1, u2, u3})
+
+    # The next three tests (test_population_excludes_*) all set up
+    #   - A normal user included in the user dir
+    #   - A public and private room created by that user
+    #   - A user excluded from the room dir, belonging to both rooms
+
+    # They match similar logic in handlers/test_user_directory.py But that tests
+    # updating the directory; this tests rebuilding it from scratch.
+
+    def _create_rooms_and_inject_memberships(
+        self, creator: str, token: str, joiner: str
+    ) -> Tuple[str, str]:
+        """Create a public and private room as a normal user.
+        Then get the `joiner` into those rooms.
+        """
+        public_room = self.helper.create_room_as(
+            creator,
+            is_public=True,
+            # See https://github.com/matrix-org/synapse/issues/10951
+            extra_content={"visibility": "public"},
+            tok=token,
+        )
+        private_room = self.helper.create_room_as(creator, is_public=False, tok=token)
+
+        # HACK: get the user into these rooms
+        self.get_success(inject_member_event(self.hs, public_room, joiner, "join"))
+        self.get_success(inject_member_event(self.hs, private_room, joiner, "join"))
+
+        return public_room, private_room
+
+    def _check_room_sharing_tables(
+        self, normal_user: str, public_room: str, private_room: str
+    ) -> None:
+        # After rebuilding the directory, we should only see the normal user.
+        users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
+        self.assertEqual(users, {normal_user})
+        in_public_rooms = self.get_success(
+            self.user_dir_helper.get_users_in_public_rooms()
+        )
+        self.assertEqual(set(in_public_rooms), {(normal_user, public_room)})
+        in_private_rooms = self.get_success(
+            self.user_dir_helper.get_users_who_share_private_rooms()
+        )
+        self.assertEqual(in_private_rooms, [])
+
+    def test_population_excludes_support_user(self) -> None:
+        # Create a normal and support user.
+        user = self.register_user("user", "pass")
+        token = self.login(user, "pass")
+        support = "@support1:test"
+        self.get_success(
+            self.store.register_user(
+                user_id=support, password_hash=None, user_type=UserTypes.SUPPORT
+            )
+        )
+
+        # Join the support user to rooms owned by the normal user.
+        public, private = self._create_rooms_and_inject_memberships(
+            user, token, support
+        )
+
+        # Rebuild the directory.
+        self._purge_and_rebuild_user_dir()
+
+        # Check the support user is not in the directory.
+        self._check_room_sharing_tables(user, public, private)
+
+    def test_population_excludes_deactivated_user(self) -> None:
+        user = self.register_user("naughty", "pass")
+        admin = self.register_user("admin", "pass", admin=True)
+        admin_token = self.login(admin, "pass")
+
+        # Deactivate the user.
+        channel = self.make_request(
+            "PUT",
+            f"/_synapse/admin/v2/users/{user}",
+            access_token=admin_token,
+            content={"deactivated": True},
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["deactivated"], True)
+
+        # Join the deactivated user to rooms owned by the admin.
+        # Is this something that could actually happen outside of a test?
+        public, private = self._create_rooms_and_inject_memberships(
+            admin, admin_token, user
+        )
+
+        # Rebuild the user dir. The deactivated user should be missing.
+        self._purge_and_rebuild_user_dir()
+        self._check_room_sharing_tables(admin, public, private)
+
+    def test_population_excludes_appservice_user(self) -> None:
+        # Register an AS user.
+        user = self.register_user("user", "pass")
+        token = self.login(user, "pass")
+        as_user = self.register_appservice_user("as_user_potato", self.appservice.token)
+
+        # Join the AS user to rooms owned by the normal user.
+        public, private = self._create_rooms_and_inject_memberships(
+            user, token, as_user
+        )
+
+        # Rebuild the directory.
+        self._purge_and_rebuild_user_dir()
+
+        # Check the AS user is not in the directory.
+        self._check_room_sharing_tables(user, public, private)
+
 
 class UserDirectoryStoreTestCase(HomeserverTestCase):
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
diff --git a/tests/unittest.py b/tests/unittest.py
index 1f803564f6..ae393ee53e 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -596,6 +596,35 @@ class HomeserverTestCase(TestCase):
         user_id = channel.json_body["user_id"]
         return user_id
 
+    def register_appservice_user(
+        self,
+        username: str,
+        appservice_token: str,
+    ) -> str:
+        """Register an appservice user as an application service.
+        Requires the client-facing registration API be registered.
+
+        Args:
+            username: the user to be registered by an application service.
+                Should be a full username, i.e. ""@localpart:hostname" as opposed to just "localpart"
+            appservice_token: the acccess token for that application service.
+
+        Raises: if the request to '/register' does not return 200 OK.
+
+        Returns: the MXID of the new user.
+        """
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/register",
+            {
+                "username": username,
+                "type": "m.login.application_service",
+            },
+            access_token=appservice_token,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+        return channel.json_body["user_id"]
+
     def login(
         self,
         username,