From 4b965c862dc66c0da5d3240add70e9b5f0aa720b Mon Sep 17 00:00:00 2001 From: Jonathan de Jong Date: Wed, 14 Apr 2021 16:34:27 +0200 Subject: Remove redundant "coding: utf-8" lines (#9786) Part of #9744 Removes all redundant `# -*- coding: utf-8 -*-` lines from files, as python 3 automatically reads source code as utf-8 now. `Signed-off-by: Jonathan de Jong ` --- synapse/storage/databases/main/roommember.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/storage/databases/main/roommember.py') diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index a9216ca9ae..ef5587f87a 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd # Copyright 2018 New Vector Ltd # -- cgit 1.5.1 From c571736c6ca5d1d2d9bf7cd9b717465d446ac7b3 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 16 Apr 2021 18:17:18 +0100 Subject: User directory: use calculated room membership state instead (#9821) Fixes: #9797. Should help reduce CPU usage on the user directory, especially when memberships change in rooms with lots of state history. --- changelog.d/9821.misc | 1 + synapse/handlers/user_directory.py | 15 ++++++++------- synapse/storage/databases/main/roommember.py | 27 +++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 changelog.d/9821.misc (limited to 'synapse/storage/databases/main/roommember.py') diff --git a/changelog.d/9821.misc b/changelog.d/9821.misc new file mode 100644 index 0000000000..03b2d2ed4d --- /dev/null +++ b/changelog.d/9821.misc @@ -0,0 +1 @@ +Reduce CPU usage of the user directory by reusing existing calculated room membership. \ No newline at end of file diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 9b1e6d5c18..dacc4f3076 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -44,7 +44,6 @@ class UserDirectoryHandler(StateDeltasHandler): super().__init__(hs) self.store = hs.get_datastore() - self.state = hs.get_state_handler() self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() @@ -302,10 +301,12 @@ class UserDirectoryHandler(StateDeltasHandler): # ignore the change return - users_with_profile = await self.state.get_current_users_in_room(room_id) + other_users_in_room_with_profiles = ( + await self.store.get_users_in_room_with_profiles(room_id) + ) # Remove every user from the sharing tables for that room. - for user_id in users_with_profile.keys(): + for user_id in other_users_in_room_with_profiles.keys(): await self.store.remove_user_who_share_room(user_id, room_id) # Then, re-add them to the tables. @@ -314,7 +315,7 @@ class UserDirectoryHandler(StateDeltasHandler): # which when ran over an entire room, will result in the same values # being added multiple times. The batching upserts shouldn't make this # too bad, though. - for user_id, profile in users_with_profile.items(): + for user_id, profile in other_users_in_room_with_profiles.items(): await self._handle_new_user(room_id, user_id, profile) async def _handle_new_user( @@ -336,7 +337,7 @@ class UserDirectoryHandler(StateDeltasHandler): room_id ) # Now we update users who share rooms with users. - users_with_profile = await self.state.get_current_users_in_room(room_id) + other_users_in_room = await self.store.get_users_in_room(room_id) if is_public: await self.store.add_users_in_public_rooms(room_id, (user_id,)) @@ -352,14 +353,14 @@ class UserDirectoryHandler(StateDeltasHandler): # We don't care about appservice users. if not is_appservice: - for other_user_id in users_with_profile: + for other_user_id in other_users_in_room: if user_id == other_user_id: continue to_insert.add((user_id, other_user_id)) # Next we need to update for every local user in the room - for other_user_id in users_with_profile: + for other_user_id in other_users_in_room: if user_id == other_user_id: continue diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index ef5587f87a..fd525dce65 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -173,6 +173,33 @@ class RoomMemberWorkerStore(EventsWorkerStore): txn.execute(sql, (room_id, Membership.JOIN)) return [r[0] for r in txn] + @cached(max_entries=100000, iterable=True) + async def get_users_in_room_with_profiles( + self, room_id: str + ) -> Dict[str, ProfileInfo]: + """Get a mapping from user ID to profile information for all users in a given room. + + Args: + room_id: The ID of the room to retrieve the users of. + + Returns: + A mapping from user ID to ProfileInfo. + """ + + def _get_users_in_room_with_profiles(txn) -> Dict[str, ProfileInfo]: + sql = """ + SELECT user_id, display_name, avatar_url FROM room_memberships + WHERE room_id = ? AND membership = ? + """ + txn.execute(sql, (room_id, Membership.JOIN)) + + return {r[0]: ProfileInfo(display_name=r[1], avatar_url=r[2]) for r in txn} + + return await self.db_pool.runInteraction( + "get_users_in_room_with_profiles", + _get_users_in_room_with_profiles, + ) + @cached(max_entries=100000) async def get_room_summary(self, room_id: str) -> Dict[str, MemberSummary]: """Get the details of a room roughly suitable for use by the room -- cgit 1.5.1 From 294c67503300b6bfa7785a5cfa55e25c1e452574 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 22 Apr 2021 16:43:50 +0100 Subject: Remove `synapse.types.Collection` (#9856) This is no longer required, since we have dropped support for Python 3.5. --- changelog.d/9856.misc | 1 + synapse/config/oidc.py | 4 ++-- synapse/events/spamcheck.py | 3 +-- synapse/federation/sender/__init__.py | 14 ++++++++++++-- synapse/handlers/appservice.py | 4 ++-- synapse/handlers/device.py | 3 +-- synapse/handlers/presence.py | 3 ++- synapse/handlers/sso.py | 3 ++- synapse/handlers/sync.py | 13 +++++++++++-- synapse/notifier.py | 9 ++------- synapse/replication/tcp/protocol.py | 3 +-- synapse/state/__init__.py | 3 ++- synapse/state/v2.py | 3 ++- synapse/storage/_base.py | 4 ++-- synapse/storage/database.py | 2 +- synapse/storage/databases/main/devices.py | 4 ++-- synapse/storage/databases/main/event_federation.py | 3 +-- synapse/storage/databases/main/events_worker.py | 13 +++++++++++-- synapse/storage/databases/main/roommember.py | 14 ++++++++++++-- synapse/storage/databases/main/search.py | 3 +-- synapse/storage/databases/main/stream.py | 4 ++-- synapse/storage/persist_events.py | 3 +-- synapse/storage/prepare_database.py | 3 +-- synapse/types.py | 14 -------------- synapse/util/caches/stream_change_cache.py | 3 +-- synapse/util/iterutils.py | 3 +-- 26 files changed, 77 insertions(+), 62 deletions(-) create mode 100644 changelog.d/9856.misc (limited to 'synapse/storage/databases/main/roommember.py') diff --git a/changelog.d/9856.misc b/changelog.d/9856.misc new file mode 100644 index 0000000000..d67e8c386a --- /dev/null +++ b/changelog.d/9856.misc @@ -0,0 +1 @@ +Remove redundant `synapse.types.Collection` type definition. diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py index 72402eb81d..ea0abf5aa2 100644 --- a/synapse/config/oidc.py +++ b/synapse/config/oidc.py @@ -14,14 +14,14 @@ # limitations under the License. from collections import Counter -from typing import Iterable, List, Mapping, Optional, Tuple, Type +from typing import Collection, Iterable, List, Mapping, Optional, Tuple, Type import attr from synapse.config._util import validate_config from synapse.config.sso import SsoAttributeRequirement from synapse.python_dependencies import DependencyException, check_requirements -from synapse.types import Collection, JsonDict +from synapse.types import JsonDict from synapse.util.module_loader import load_module from synapse.util.stringutils import parse_and_validate_mxc_uri diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index c727b48c1e..7118d5f52d 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -15,12 +15,11 @@ import inspect import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Union from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour -from synapse.types import Collection from synapse.util.async_helpers import maybe_awaitable if TYPE_CHECKING: diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index b00a55324c..022bbf7dad 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -14,7 +14,17 @@ import abc import logging -from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Hashable, + Iterable, + List, + Optional, + Set, + Tuple, +) from prometheus_client import Counter @@ -31,7 +41,7 @@ from synapse.metrics import ( events_processed_counter, ) from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.types import Collection, JsonDict, ReadReceipt, RoomStreamToken +from synapse.types import JsonDict, ReadReceipt, RoomStreamToken from synapse.util.metrics import Measure if TYPE_CHECKING: diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index d7bc4e23ed..177310f0be 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Union from prometheus_client import Counter @@ -33,7 +33,7 @@ from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) from synapse.storage.databases.main.directory import RoomAliasMapping -from synapse.types import Collection, JsonDict, RoomAlias, RoomStreamToken, UserID +from synapse.types import JsonDict, RoomAlias, RoomStreamToken, UserID from synapse.util.metrics import Measure if TYPE_CHECKING: diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c1d7800981..34d39e3b44 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple from synapse.api import errors from synapse.api.constants import EventTypes @@ -28,7 +28,6 @@ from synapse.api.errors import ( from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import ( - Collection, JsonDict, StreamToken, UserID, diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 598466c9bd..7fd28ffa54 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -28,6 +28,7 @@ from bisect import bisect from contextlib import contextmanager from typing import ( TYPE_CHECKING, + Collection, Dict, FrozenSet, Iterable, @@ -59,7 +60,7 @@ from synapse.replication.tcp.commands import ClearUserSyncsCommand from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream from synapse.state import StateHandler from synapse.storage.databases.main import DataStore -from synapse.types import Collection, JsonDict, UserID, get_domain_from_id +from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.metrics import Measure diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 8d00ffdc73..044ff06d84 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -18,6 +18,7 @@ from typing import ( Any, Awaitable, Callable, + Collection, Dict, Iterable, List, @@ -40,7 +41,7 @@ from synapse.handlers.ui_auth import UIAuthSessionDataConstants from synapse.http import get_request_user_agent from synapse.http.server import respond_with_html, respond_with_redirect from synapse.http.site import SynapseRequest -from synapse.types import Collection, JsonDict, UserID, contains_invalid_mxid_characters +from synapse.types import JsonDict, UserID, contains_invalid_mxid_characters from synapse.util.async_helpers import Linearizer from synapse.util.stringutils import random_string diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index dc8ee8cd17..a9a3ee05c3 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -14,7 +14,17 @@ # limitations under the License. import itertools import logging -from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + FrozenSet, + List, + Optional, + Set, + Tuple, +) import attr from prometheus_client import Counter @@ -28,7 +38,6 @@ from synapse.push.clientformat import format_push_rules_for_user from synapse.storage.roommember import MemberSummary from synapse.storage.state import StateFilter from synapse.types import ( - Collection, JsonDict, MutableStateMap, Requester, diff --git a/synapse/notifier.py b/synapse/notifier.py index d5ab77058d..b9531007e2 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -17,6 +17,7 @@ from collections import namedtuple from typing import ( Awaitable, Callable, + Collection, Dict, Iterable, List, @@ -42,13 +43,7 @@ from synapse.logging.opentracing import log_kv, start_active_span from synapse.logging.utils import log_function from synapse.metrics import LaterGauge from synapse.streams.config import PaginationConfig -from synapse.types import ( - Collection, - PersistedEventPosition, - RoomStreamToken, - StreamToken, - UserID, -) +from synapse.types import PersistedEventPosition, RoomStreamToken, StreamToken, UserID from synapse.util.async_helpers import ObservableDeferred, timeout_deferred from synapse.util.metrics import Measure from synapse.visibility import filter_events_for_client diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 6860576e78..6e3705364f 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -49,7 +49,7 @@ import fcntl import logging import struct from inspect import isawaitable -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Collection, List, Optional from prometheus_client import Counter from zope.interface import Interface, implementer @@ -76,7 +76,6 @@ from synapse.replication.tcp.commands import ( ServerCommand, parse_command_from_line, ) -from synapse.types import Collection from synapse.util import Clock from synapse.util.stringutils import random_string diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index c7ee731154..b3bd92d37c 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -19,6 +19,7 @@ from typing import ( Any, Awaitable, Callable, + Collection, DefaultDict, Dict, FrozenSet, @@ -46,7 +47,7 @@ from synapse.logging.utils import log_function from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.roommember import ProfileInfo -from synapse.types import Collection, StateMap +from synapse.types import StateMap from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.metrics import Measure, measure_func diff --git a/synapse/state/v2.py b/synapse/state/v2.py index 32671ddbde..008644cd98 100644 --- a/synapse/state/v2.py +++ b/synapse/state/v2.py @@ -18,6 +18,7 @@ import logging from typing import ( Any, Callable, + Collection, Dict, Generator, Iterable, @@ -37,7 +38,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import AuthError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import EventBase -from synapse.types import Collection, MutableStateMap, StateMap +from synapse.types import MutableStateMap, StateMap from synapse.util import Clock logger = logging.getLogger(__name__) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 56dd3a4861..d472676acf 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -16,13 +16,13 @@ import logging import random from abc import ABCMeta -from typing import TYPE_CHECKING, Any, Iterable, Optional, Union +from typing import TYPE_CHECKING, Any, Collection, Iterable, Optional, Union from synapse.storage.database import LoggingTransaction # noqa: F401 from synapse.storage.database import make_in_list_sql_clause # noqa: F401 from synapse.storage.database import DatabasePool from synapse.storage.types import Connection -from synapse.types import Collection, StreamToken, get_domain_from_id +from synapse.types import StreamToken, get_domain_from_id from synapse.util import json_decoder if TYPE_CHECKING: diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 9a6d2b21f9..9452368bf0 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -20,6 +20,7 @@ from time import monotonic as monotonic_time from typing import ( Any, Callable, + Collection, Dict, Iterable, Iterator, @@ -48,7 +49,6 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.background_updates import BackgroundUpdater from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.types import Connection, Cursor -from synapse.types import Collection # python 3 does not have a maximum int value MAX_TXN_ID = 2 ** 63 - 1 diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index b204875580..9be713399f 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -15,7 +15,7 @@ # limitations under the License. import abc import logging -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple +from typing import Any, Collection, Dict, Iterable, List, Optional, Set, Tuple from synapse.api.errors import Codes, StoreError from synapse.logging.opentracing import ( @@ -31,7 +31,7 @@ from synapse.storage.database import ( LoggingTransaction, make_tuple_comparison_clause, ) -from synapse.types import Collection, JsonDict, get_verify_key_from_cross_signing_key +from synapse.types import JsonDict, get_verify_key_from_cross_signing_key from synapse.util import json_decoder, json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.lrucache import LruCache diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 32ce70a396..ff81d5cd17 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Dict, Iterable, List, Set, Tuple +from typing import Collection, Dict, Iterable, List, Set, Tuple from synapse.api.errors import StoreError from synapse.events import EventBase @@ -25,7 +25,6 @@ from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore from synapse.storage.engines import PostgresEngine from synapse.storage.types import Cursor -from synapse.types import Collection from synapse.util.caches.descriptors import cached from synapse.util.caches.lrucache import LruCache from synapse.util.iterutils import batch_iter diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 64d70785b8..2c823e09cf 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -15,7 +15,16 @@ import logging import threading from collections import namedtuple -from typing import Container, Dict, Iterable, List, Optional, Tuple, overload +from typing import ( + Collection, + Container, + Dict, + Iterable, + List, + Optional, + Tuple, + overload, +) from constantly import NamedConstant, Names from typing_extensions import Literal @@ -45,7 +54,7 @@ from synapse.storage.database import DatabasePool from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import Collection, JsonDict, get_domain_from_id +from synapse.types import JsonDict, get_domain_from_id from synapse.util.caches.descriptors import cached from synapse.util.caches.lrucache import LruCache from synapse.util.iterutils import batch_iter diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index fd525dce65..bd8513cd43 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -13,7 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, FrozenSet, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + FrozenSet, + Iterable, + List, + Optional, + Set, + Tuple, +) from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase @@ -33,7 +43,7 @@ from synapse.storage.roommember import ( ProfileInfo, RoomsForUser, ) -from synapse.types import Collection, PersistedEventPosition, get_domain_from_id +from synapse.types import PersistedEventPosition, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 0276f30656..6480d5a9f5 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -15,7 +15,7 @@ import logging import re from collections import namedtuple -from typing import List, Optional, Set +from typing import Collection, List, Optional, Set from synapse.api.errors import SynapseError from synapse.events import EventBase @@ -23,7 +23,6 @@ from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_cla from synapse.storage.database import DatabasePool from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.engines import PostgresEngine, Sqlite3Engine -from synapse.types import Collection logger = logging.getLogger(__name__) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index db5ce4ea01..7581c7d3ff 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -37,7 +37,7 @@ what sort order was used: import abc import logging from collections import namedtuple -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Set, Tuple from twisted.internet import defer @@ -53,7 +53,7 @@ from synapse.storage.database import ( from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator -from synapse.types import Collection, PersistedEventPosition, RoomStreamToken +from synapse.types import PersistedEventPosition, RoomStreamToken from synapse.util.caches.descriptors import cached from synapse.util.caches.stream_change_cache import StreamChangeCache diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 87e040b014..33dc752d8f 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -17,7 +17,7 @@ import itertools import logging from collections import deque, namedtuple -from typing import Dict, Iterable, List, Optional, Set, Tuple +from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple from prometheus_client import Counter, Histogram @@ -32,7 +32,6 @@ from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( - Collection, PersistedEventPosition, RoomStreamToken, StateMap, diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 05a9355974..7a2cbee426 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -17,7 +17,7 @@ import logging import os import re from collections import Counter -from typing import Generator, Iterable, List, Optional, TextIO, Tuple +from typing import Collection, Generator, Iterable, List, Optional, TextIO, Tuple import attr from typing_extensions import Counter as CounterType @@ -27,7 +27,6 @@ from synapse.storage.database import LoggingDatabaseConnection from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.engines.postgres import PostgresEngine from synapse.storage.types import Cursor -from synapse.types import Collection logger = logging.getLogger(__name__) diff --git a/synapse/types.py b/synapse/types.py index 21654ae686..e19f28d543 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -15,13 +15,11 @@ import abc import re import string -import sys from collections import namedtuple from typing import ( TYPE_CHECKING, Any, Dict, - Iterable, Mapping, MutableMapping, Optional, @@ -50,18 +48,6 @@ if TYPE_CHECKING: from synapse.appservice.api import ApplicationService from synapse.storage.databases.main import DataStore -# define a version of typing.Collection that works on python 3.5 -if sys.version_info[:3] >= (3, 6, 0): - from typing import Collection -else: - from typing import Container, Sized - - T_co = TypeVar("T_co", covariant=True) - - class Collection(Iterable[T_co], Container[T_co], Sized): # type: ignore - __slots__ = () - - # Define a state map type from type/state_key to T (usually an event ID or # event) T = TypeVar("T") diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 0469e7d120..e81e468899 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -14,11 +14,10 @@ import logging import math -from typing import Dict, FrozenSet, List, Mapping, Optional, Set, Union +from typing import Collection, Dict, FrozenSet, List, Mapping, Optional, Set, Union from sortedcontainers import SortedDict -from synapse.types import Collection from synapse.util import caches logger = logging.getLogger(__name__) diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index 6f73b1d56d..abfdc29832 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -15,6 +15,7 @@ import heapq from itertools import islice from typing import ( + Collection, Dict, Generator, Iterable, @@ -26,8 +27,6 @@ from typing import ( TypeVar, ) -from synapse.types import Collection - T = TypeVar("T") -- cgit 1.5.1 From 3853a7edfcee1c00ba4df04b06821397e1155257 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Apr 2021 11:47:07 +0100 Subject: Only store data in caches, not "smart" objects (#9845) --- changelog.d/9845.misc | 1 + synapse/push/bulk_push_rule_evaluator.py | 161 +++++++++++++++------------ synapse/storage/databases/main/roommember.py | 161 +++++++++++++++------------ 3 files changed, 182 insertions(+), 141 deletions(-) create mode 100644 changelog.d/9845.misc (limited to 'synapse/storage/databases/main/roommember.py') diff --git a/changelog.d/9845.misc b/changelog.d/9845.misc new file mode 100644 index 0000000000..875dd6d131 --- /dev/null +++ b/changelog.d/9845.misc @@ -0,0 +1 @@ +Only store the raw data in the in-memory caches, rather than objects that include references to e.g. the data stores. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 50b470c310..350646f458 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -106,6 +106,10 @@ class BulkPushRuleEvaluator: self.store = hs.get_datastore() self.auth = hs.get_auth() + # Used by `RulesForRoom` to ensure only one thing mutates the cache at a + # time. Keyed off room_id. + self._rules_linearizer = Linearizer(name="rules_for_room") + self.room_push_rule_cache_metrics = register_cache( "cache", "room_push_rule_cache", @@ -123,7 +127,16 @@ class BulkPushRuleEvaluator: dict of user_id -> push_rules """ room_id = event.room_id - rules_for_room = self._get_rules_for_room(room_id) + + rules_for_room_data = self._get_rules_for_room(room_id) + rules_for_room = RulesForRoom( + hs=self.hs, + room_id=room_id, + rules_for_room_cache=self._get_rules_for_room.cache, + room_push_rule_cache_metrics=self.room_push_rule_cache_metrics, + linearizer=self._rules_linearizer, + cached_data=rules_for_room_data, + ) rules_by_user = await rules_for_room.get_rules(event, context) @@ -142,17 +155,12 @@ class BulkPushRuleEvaluator: return rules_by_user @lru_cache() - def _get_rules_for_room(self, room_id: str) -> "RulesForRoom": - """Get the current RulesForRoom object for the given room id""" - # It's important that RulesForRoom gets added to self._get_rules_for_room.cache + def _get_rules_for_room(self, room_id: str) -> "RulesForRoomData": + """Get the current RulesForRoomData object for the given room id""" + # It's important that the RulesForRoomData object gets added to self._get_rules_for_room.cache # before any lookup methods get called on it as otherwise there may be # a race if invalidate_all gets called (which assumes its in the cache) - return RulesForRoom( - self.hs, - room_id, - self._get_rules_for_room.cache, - self.room_push_rule_cache_metrics, - ) + return RulesForRoomData() async def _get_power_levels_and_sender_level( self, event: EventBase, context: EventContext @@ -282,11 +290,49 @@ def _condition_checker( return True +@attr.s(slots=True) +class RulesForRoomData: + """The data stored in the cache by `RulesForRoom`. + + We don't store `RulesForRoom` directly in the cache as we want our caches to + *only* include data, and not references to e.g. the data stores. + """ + + # event_id -> (user_id, state) + member_map = attr.ib(type=Dict[str, Tuple[str, str]], factory=dict) + # user_id -> rules + rules_by_user = attr.ib(type=Dict[str, List[Dict[str, dict]]], factory=dict) + + # The last state group we updated the caches for. If the state_group of + # a new event comes along, we know that we can just return the cached + # result. + # On invalidation of the rules themselves (if the user changes them), + # we invalidate everything and set state_group to `object()` + state_group = attr.ib(type=Union[object, int], factory=object) + + # A sequence number to keep track of when we're allowed to update the + # cache. We bump the sequence number when we invalidate the cache. If + # the sequence number changes while we're calculating stuff we should + # not update the cache with it. + sequence = attr.ib(type=int, default=0) + + # A cache of user_ids that we *know* aren't interesting, e.g. user_ids + # owned by AS's, or remote users, etc. (I.e. users we will never need to + # calculate push for) + # These never need to be invalidated as we will never set up push for + # them. + uninteresting_user_set = attr.ib(type=Set[str], factory=set) + + class RulesForRoom: """Caches push rules for users in a room. This efficiently handles users joining/leaving the room by not invalidating the entire cache for the room. + + A new instance is constructed for each call to + `BulkPushRuleEvaluator._get_rules_for_event`, with the cached data from + previous calls passed in. """ def __init__( @@ -295,6 +341,8 @@ class RulesForRoom: room_id: str, rules_for_room_cache: LruCache, room_push_rule_cache_metrics: CacheMetric, + linearizer: Linearizer, + cached_data: RulesForRoomData, ): """ Args: @@ -303,38 +351,21 @@ class RulesForRoom: rules_for_room_cache: The cache object that caches these RoomsForUser objects. room_push_rule_cache_metrics: The metrics object + linearizer: The linearizer used to ensure only one thing mutates + the cache at a time. Keyed off room_id + cached_data: Cached data from previous calls to `self.get_rules`, + can be mutated. """ self.room_id = room_id self.is_mine_id = hs.is_mine_id self.store = hs.get_datastore() self.room_push_rule_cache_metrics = room_push_rule_cache_metrics - self.linearizer = Linearizer(name="rules_for_room") - - # event_id -> (user_id, state) - self.member_map = {} # type: Dict[str, Tuple[str, str]] - # user_id -> rules - self.rules_by_user = {} # type: Dict[str, List[Dict[str, dict]]] - - # The last state group we updated the caches for. If the state_group of - # a new event comes along, we know that we can just return the cached - # result. - # On invalidation of the rules themselves (if the user changes them), - # we invalidate everything and set state_group to `object()` - self.state_group = object() - - # A sequence number to keep track of when we're allowed to update the - # cache. We bump the sequence number when we invalidate the cache. If - # the sequence number changes while we're calculating stuff we should - # not update the cache with it. - self.sequence = 0 - - # A cache of user_ids that we *know* aren't interesting, e.g. user_ids - # owned by AS's, or remote users, etc. (I.e. users we will never need to - # calculate push for) - # These never need to be invalidated as we will never set up push for - # them. - self.uninteresting_user_set = set() # type: Set[str] + # Used to ensure only one thing mutates the cache at a time. Keyed off + # room_id. + self.linearizer = linearizer + + self.data = cached_data # We need to be clever on the invalidating caches callbacks, as # otherwise the invalidation callback holds a reference to the object, @@ -352,25 +383,25 @@ class RulesForRoom: """ state_group = context.state_group - if state_group and self.state_group == state_group: + if state_group and self.data.state_group == state_group: logger.debug("Using cached rules for %r", self.room_id) self.room_push_rule_cache_metrics.inc_hits() - return self.rules_by_user + return self.data.rules_by_user - with (await self.linearizer.queue(())): - if state_group and self.state_group == state_group: + with (await self.linearizer.queue(self.room_id)): + if state_group and self.data.state_group == state_group: logger.debug("Using cached rules for %r", self.room_id) self.room_push_rule_cache_metrics.inc_hits() - return self.rules_by_user + return self.data.rules_by_user self.room_push_rule_cache_metrics.inc_misses() ret_rules_by_user = {} missing_member_event_ids = {} - if state_group and self.state_group == context.prev_group: + if state_group and self.data.state_group == context.prev_group: # If we have a simple delta then we can reuse most of the previous # results. - ret_rules_by_user = self.rules_by_user + ret_rules_by_user = self.data.rules_by_user current_state_ids = context.delta_ids push_rules_delta_state_cache_metric.inc_hits() @@ -393,24 +424,24 @@ class RulesForRoom: if typ != EventTypes.Member: continue - if user_id in self.uninteresting_user_set: + if user_id in self.data.uninteresting_user_set: continue if not self.is_mine_id(user_id): - self.uninteresting_user_set.add(user_id) + self.data.uninteresting_user_set.add(user_id) continue if self.store.get_if_app_services_interested_in_user(user_id): - self.uninteresting_user_set.add(user_id) + self.data.uninteresting_user_set.add(user_id) continue event_id = current_state_ids[key] - res = self.member_map.get(event_id, None) + res = self.data.member_map.get(event_id, None) if res: user_id, state = res if state == Membership.JOIN: - rules = self.rules_by_user.get(user_id, None) + rules = self.data.rules_by_user.get(user_id, None) if rules: ret_rules_by_user[user_id] = rules continue @@ -430,7 +461,7 @@ class RulesForRoom: else: # The push rules didn't change but lets update the cache anyway self.update_cache( - self.sequence, + self.data.sequence, members={}, # There were no membership changes rules_by_user=ret_rules_by_user, state_group=state_group, @@ -461,7 +492,7 @@ class RulesForRoom: for. Used when updating the cache. event: The event we are currently computing push rules for. """ - sequence = self.sequence + sequence = self.data.sequence rows = await self.store.get_membership_from_event_ids(member_event_ids.values()) @@ -501,23 +532,11 @@ class RulesForRoom: self.update_cache(sequence, members, ret_rules_by_user, state_group) - def invalidate_all(self) -> None: - # Note: Don't hand this function directly to an invalidation callback - # as it keeps a reference to self and will stop this instance from being - # GC'd if it gets dropped from the rules_to_user cache. Instead use - # `self.invalidate_all_cb` - logger.debug("Invalidating RulesForRoom for %r", self.room_id) - self.sequence += 1 - self.state_group = object() - self.member_map = {} - self.rules_by_user = {} - push_rules_invalidation_counter.inc() - def update_cache(self, sequence, members, rules_by_user, state_group) -> None: - if sequence == self.sequence: - self.member_map.update(members) - self.rules_by_user = rules_by_user - self.state_group = state_group + if sequence == self.data.sequence: + self.data.member_map.update(members) + self.data.rules_by_user = rules_by_user + self.data.state_group = state_group @attr.attrs(slots=True, frozen=True) @@ -535,6 +554,10 @@ class _Invalidation: room_id = attr.ib(type=str) def __call__(self) -> None: - rules = self.cache.get(self.room_id, None, update_metrics=False) - if rules: - rules.invalidate_all() + rules_data = self.cache.get(self.room_id, None, update_metrics=False) + if rules_data: + rules_data.sequence += 1 + rules_data.state_group = object() + rules_data.member_map = {} + rules_data.rules_by_user = {} + push_rules_invalidation_counter.inc() diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index bd8513cd43..2a8532f8c1 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -23,8 +23,11 @@ from typing import ( Optional, Set, Tuple, + Union, ) +import attr + from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext @@ -43,7 +46,7 @@ from synapse.storage.roommember import ( ProfileInfo, RoomsForUser, ) -from synapse.types import PersistedEventPosition, get_domain_from_id +from synapse.types import PersistedEventPosition, StateMap, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList @@ -63,6 +66,10 @@ class RoomMemberWorkerStore(EventsWorkerStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) + # Used by `_get_joined_hosts` to ensure only one thing mutates the cache + # at a time. Keyed by room_id. + self._joined_host_linearizer = Linearizer("_JoinedHostsCache") + # Is the current_state_events.membership up to date? Or is the # background update still running? self._current_state_events_membership_up_to_date = False @@ -740,19 +747,82 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(num_args=2, max_entries=10000, iterable=True) async def _get_joined_hosts( - self, room_id, state_group, current_state_ids, state_entry - ): - # We don't use `state_group`, its there so that we can cache based - # on it. However, its important that its never None, since two current_state's - # with a state_group of None are likely to be different. + self, + room_id: str, + state_group: int, + current_state_ids: StateMap[str], + state_entry: "_StateCacheEntry", + ) -> FrozenSet[str]: + # We don't use `state_group`, its there so that we can cache based on + # it. However, its important that its never None, since two + # current_state's with a state_group of None are likely to be different. + # + # The `state_group` must match the `state_entry.state_group` (if not None). assert state_group is not None - + assert state_entry.state_group is None or state_entry.state_group == state_group + + # We use a secondary cache of previous work to allow us to build up the + # joined hosts for the given state group based on previous state groups. + # + # We cache one object per room containing the results of the last state + # group we got joined hosts for. The idea is that generally + # `get_joined_hosts` is called with the "current" state group for the + # room, and so consecutive calls will be for consecutive state groups + # which point to the previous state group. cache = await self._get_joined_hosts_cache(room_id) - return await cache.get_destinations(state_entry) + + # If the state group in the cache matches, we already have the data we need. + if state_entry.state_group == cache.state_group: + return frozenset(cache.hosts_to_joined_users) + + # Since we'll mutate the cache we need to lock. + with (await self._joined_host_linearizer.queue(room_id)): + if state_entry.state_group == cache.state_group: + # Same state group, so nothing to do. We've already checked for + # this above, but the cache may have changed while waiting on + # the lock. + pass + elif state_entry.prev_group == cache.state_group: + # The cached work is for the previous state group, so we work out + # the delta. + for (typ, state_key), event_id in state_entry.delta_ids.items(): + if typ != EventTypes.Member: + continue + + host = intern_string(get_domain_from_id(state_key)) + user_id = state_key + known_joins = cache.hosts_to_joined_users.setdefault(host, set()) + + event = await self.get_event(event_id) + if event.membership == Membership.JOIN: + known_joins.add(user_id) + else: + known_joins.discard(user_id) + + if not known_joins: + cache.hosts_to_joined_users.pop(host, None) + else: + # The cache doesn't match the state group or prev state group, + # so we calculate the result from first principles. + joined_users = await self.get_joined_users_from_state( + room_id, state_entry + ) + + cache.hosts_to_joined_users = {} + for user_id in joined_users: + host = intern_string(get_domain_from_id(user_id)) + cache.hosts_to_joined_users.setdefault(host, set()).add(user_id) + + if state_entry.state_group: + cache.state_group = state_entry.state_group + else: + cache.state_group = object() + + return frozenset(cache.hosts_to_joined_users) @cached(max_entries=10000) def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache": - return _JoinedHostsCache(self, room_id) + return _JoinedHostsCache() @cached(num_args=2) async def did_forget(self, user_id: str, room_id: str) -> bool: @@ -1062,71 +1132,18 @@ class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): await self.db_pool.runInteraction("forget_membership", f) +@attr.s(slots=True) class _JoinedHostsCache: - """Cache for joined hosts in a room that is optimised to handle updates - via state deltas. - """ - - def __init__(self, store, room_id): - self.store = store - self.room_id = room_id + """The cached data used by the `_get_joined_hosts_cache`.""" - self.hosts_to_joined_users = {} + # Dict of host to the set of their users in the room at the state group. + hosts_to_joined_users = attr.ib(type=Dict[str, Set[str]], factory=dict) - self.state_group = object() - - self.linearizer = Linearizer("_JoinedHostsCache") - - self._len = 0 - - async def get_destinations(self, state_entry: "_StateCacheEntry") -> Set[str]: - """Get set of destinations for a state entry - - Args: - state_entry - - Returns: - The destinations as a set. - """ - if state_entry.state_group == self.state_group: - return frozenset(self.hosts_to_joined_users) - - with (await self.linearizer.queue(())): - if state_entry.state_group == self.state_group: - pass - elif state_entry.prev_group == self.state_group: - for (typ, state_key), event_id in state_entry.delta_ids.items(): - if typ != EventTypes.Member: - continue - - host = intern_string(get_domain_from_id(state_key)) - user_id = state_key - known_joins = self.hosts_to_joined_users.setdefault(host, set()) - - event = await self.store.get_event(event_id) - if event.membership == Membership.JOIN: - known_joins.add(user_id) - else: - known_joins.discard(user_id) - - if not known_joins: - self.hosts_to_joined_users.pop(host, None) - else: - joined_users = await self.store.get_joined_users_from_state( - self.room_id, state_entry - ) - - self.hosts_to_joined_users = {} - for user_id in joined_users: - host = intern_string(get_domain_from_id(user_id)) - self.hosts_to_joined_users.setdefault(host, set()).add(user_id) - - if state_entry.state_group: - self.state_group = state_entry.state_group - else: - self.state_group = object() - self._len = sum(len(v) for v in self.hosts_to_joined_users.values()) - return frozenset(self.hosts_to_joined_users) + # The state group `hosts_to_joined_users` is derived from. Will be an object + # if the instance is newly created or if the state is not based on a state + # group. (An object is used as a sentinel value to ensure that it never is + # equal to anything else). + state_group = attr.ib(type=Union[object, int], factory=object) def __len__(self): - return self._len + return sum(len(v) for v in self.hosts_to_joined_users.values()) -- cgit 1.5.1