From 637282bb5019ce1656001927eea1be46c4854815 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 30 Dec 2020 08:09:53 -0500 Subject: Add additional type hints to the storage module. (#8980) --- synapse/storage/prepare_database.py | 104 +++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 48 deletions(-) (limited to 'synapse/storage/prepare_database.py') diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 459754feab..f91a2eae7a 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -18,9 +18,10 @@ import logging import os import re from collections import Counter -from typing import Optional, TextIO +from typing import Generator, Iterable, List, Optional, TextIO, Tuple import attr +from typing_extensions import Counter as CounterType from synapse.config.homeserver import HomeServerConfig from synapse.storage.database import LoggingDatabaseConnection @@ -70,7 +71,7 @@ def prepare_database( db_conn: LoggingDatabaseConnection, database_engine: BaseDatabaseEngine, config: Optional[HomeServerConfig], - databases: Collection[str] = ["main", "state"], + databases: Collection[str] = ("main", "state"), ): """Prepares a physical database for usage. Will either create all necessary tables or upgrade from an older schema version. @@ -155,7 +156,9 @@ def prepare_database( raise -def _setup_new_database(cur, database_engine, databases): +def _setup_new_database( + cur: Cursor, database_engine: BaseDatabaseEngine, databases: Collection[str] +) -> None: """Sets up the physical database by finding a base set of "full schemas" and then applying any necessary deltas, including schemas from the given data stores. @@ -188,10 +191,9 @@ def _setup_new_database(cur, database_engine, databases): folder as well those in the data stores specified. Args: - cur (Cursor): a database cursor - database_engine (DatabaseEngine) - databases (list[str]): The names of the databases to instantiate - on the given physical database. + cur: a database cursor + database_engine + databases: The names of the databases to instantiate on the given physical database. """ # We're about to set up a brand new database so we check that its @@ -199,12 +201,11 @@ def _setup_new_database(cur, database_engine, databases): database_engine.check_new_database(cur) current_dir = os.path.join(dir_path, "schema", "full_schemas") - directory_entries = os.listdir(current_dir) # First we find the highest full schema version we have valid_versions = [] - for filename in directory_entries: + for filename in os.listdir(current_dir): try: ver = int(filename) except ValueError: @@ -237,7 +238,7 @@ def _setup_new_database(cur, database_engine, databases): for database in databases ) - directory_entries = [] + directory_entries = [] # type: List[_DirectoryListing] for directory in directories: directory_entries.extend( _DirectoryListing(file_name, os.path.join(directory, file_name)) @@ -275,15 +276,15 @@ def _setup_new_database(cur, database_engine, databases): def _upgrade_existing_database( - cur, - current_version, - applied_delta_files, - upgraded, - database_engine, - config, - databases, - is_empty=False, -): + cur: Cursor, + current_version: int, + applied_delta_files: List[str], + upgraded: bool, + database_engine: BaseDatabaseEngine, + config: Optional[HomeServerConfig], + databases: Collection[str], + is_empty: bool = False, +) -> None: """Upgrades an existing physical database. Delta files can either be SQL stored in *.sql files, or python modules @@ -323,21 +324,20 @@ def _upgrade_existing_database( for a version before applying those in the next version. Args: - cur (Cursor) - current_version (int): The current version of the schema. - applied_delta_files (list): A list of deltas that have already been - applied. - upgraded (bool): Whether the current version was generated by having + cur + current_version: The current version of the schema. + applied_delta_files: A list of deltas that have already been applied. + upgraded: Whether the current version was generated by having applied deltas or from full schema file. If `True` the function will never apply delta files for the given `current_version`, since the current_version wasn't generated by applying those delta files. - database_engine (DatabaseEngine) - config (synapse.config.homeserver.HomeServerConfig|None): + database_engine + config: None if we are initialising a blank database, otherwise the application config - databases (list[str]): The names of the databases to instantiate + databases: The names of the databases to instantiate on the given physical database. - is_empty (bool): Is this a blank database? I.e. do we need to run the + is_empty: Is this a blank database? I.e. do we need to run the upgrade portions of the delta scripts. """ if is_empty: @@ -358,6 +358,7 @@ def _upgrade_existing_database( if not is_empty and "main" in databases: from synapse.storage.databases.main import check_database_before_upgrade + assert config is not None check_database_before_upgrade(cur, database_engine, config) start_ver = current_version @@ -388,10 +389,10 @@ def _upgrade_existing_database( ) # Used to check if we have any duplicate file names - file_name_counter = Counter() + file_name_counter = Counter() # type: CounterType[str] # Now find which directories have anything of interest. - directory_entries = [] + directory_entries = [] # type: List[_DirectoryListing] for directory in directories: logger.debug("Looking for schema deltas in %s", directory) try: @@ -445,11 +446,11 @@ def _upgrade_existing_database( module_name = "synapse.storage.v%d_%s" % (v, root_name) with open(absolute_path) as python_file: - module = imp.load_source(module_name, absolute_path, python_file) + module = imp.load_source(module_name, absolute_path, python_file) # type: ignore logger.info("Running script %s", relative_path) - module.run_create(cur, database_engine) + module.run_create(cur, database_engine) # type: ignore if not is_empty: - module.run_upgrade(cur, database_engine, config=config) + module.run_upgrade(cur, database_engine, config=config) # type: ignore elif ext == ".pyc" or file_name == "__pycache__": # Sometimes .pyc files turn up anyway even though we've # disabled their generation; e.g. from distribution package @@ -497,14 +498,15 @@ def _upgrade_existing_database( logger.info("Schema now up to date") -def _apply_module_schemas(txn, database_engine, config): +def _apply_module_schemas( + txn: Cursor, database_engine: BaseDatabaseEngine, config: HomeServerConfig +) -> None: """Apply the module schemas for the dynamic modules, if any Args: cur: database cursor - database_engine: synapse database engine class - config (synapse.config.homeserver.HomeServerConfig): - application config + database_engine: + config: application config """ for (mod, _config) in config.password_providers: if not hasattr(mod, "get_db_schema_files"): @@ -515,15 +517,19 @@ def _apply_module_schemas(txn, database_engine, config): ) -def _apply_module_schema_files(cur, database_engine, modname, names_and_streams): +def _apply_module_schema_files( + cur: Cursor, + database_engine: BaseDatabaseEngine, + modname: str, + names_and_streams: Iterable[Tuple[str, TextIO]], +) -> None: """Apply the module schemas for a single module Args: cur: database cursor database_engine: synapse database engine class - modname (str): fully qualified name of the module - names_and_streams (Iterable[(str, file)]): the names and streams of - schemas to be applied + modname: fully qualified name of the module + names_and_streams: the names and streams of schemas to be applied """ cur.execute( "SELECT file FROM applied_module_schemas WHERE module_name = ?", (modname,), @@ -549,7 +555,7 @@ def _apply_module_schema_files(cur, database_engine, modname, names_and_streams) ) -def get_statements(f): +def get_statements(f: Iterable[str]) -> Generator[str, None, None]: statement_buffer = "" in_comment = False # If we're in a /* ... */ style comment @@ -594,17 +600,19 @@ def get_statements(f): statement_buffer = statements[-1].strip() -def executescript(txn, schema_path): +def executescript(txn: Cursor, schema_path: str) -> None: with open(schema_path, "r") as f: execute_statements_from_stream(txn, f) -def execute_statements_from_stream(cur: Cursor, f: TextIO): +def execute_statements_from_stream(cur: Cursor, f: TextIO) -> None: for statement in get_statements(f): cur.execute(statement) -def _get_or_create_schema_state(txn, database_engine): +def _get_or_create_schema_state( + txn: Cursor, database_engine: BaseDatabaseEngine +) -> Optional[Tuple[int, List[str], bool]]: # Bluntly try creating the schema_version tables. schema_path = os.path.join(dir_path, "schema", "schema_version.sql") executescript(txn, schema_path) @@ -612,7 +620,6 @@ def _get_or_create_schema_state(txn, database_engine): txn.execute("SELECT version, upgraded FROM schema_version") row = txn.fetchone() current_version = int(row[0]) if row else None - upgraded = bool(row[1]) if row else None if current_version: txn.execute( @@ -620,6 +627,7 @@ def _get_or_create_schema_state(txn, database_engine): (current_version,), ) applied_deltas = [d for d, in txn] + upgraded = bool(row[1]) return current_version, applied_deltas, upgraded return None @@ -634,5 +642,5 @@ class _DirectoryListing: `file_name` attr is kept first. """ - file_name = attr.ib() - absolute_path = attr.ib() + file_name = attr.ib(type=str) + absolute_path = attr.ib(type=str) -- cgit 1.5.1 From eee3c3c52f59661a95f2f626edc97fac295817d6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 7 Jan 2021 11:33:36 +0000 Subject: Handle updating schema version without any deltas. (#9033) This can happen when using a split out state database and we've upgraded the schema version without there being any changes in the state schema. --- changelog.d/9033.misc | 1 + synapse/storage/prepare_database.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) create mode 100644 changelog.d/9033.misc (limited to 'synapse/storage/prepare_database.py') diff --git a/changelog.d/9033.misc b/changelog.d/9033.misc new file mode 100644 index 0000000000..e9a305c0e8 --- /dev/null +++ b/changelog.d/9033.misc @@ -0,0 +1 @@ +Allow bumping schema version when using split out state database. diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index f91a2eae7a..6684403a0a 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -375,7 +375,16 @@ def _upgrade_existing_database( specific_engine_extensions = (".sqlite", ".postgres") for v in range(start_ver, SCHEMA_VERSION + 1): - logger.info("Applying schema deltas for v%d", v) + if not is_worker: + logger.info("Applying schema deltas for v%d", v) + + cur.execute("DELETE FROM schema_version") + cur.execute( + "INSERT INTO schema_version (version, upgraded) VALUES (?,?)", + (v, True), + ) + else: + logger.info("Checking schema deltas for v%d", v) # We need to search both the global and per data store schema # directories for schema updates. @@ -489,12 +498,6 @@ def _upgrade_existing_database( (v, relative_path), ) - cur.execute("DELETE FROM schema_version") - cur.execute( - "INSERT INTO schema_version (version, upgraded) VALUES (?,?)", - (v, True), - ) - logger.info("Schema now up to date") -- cgit 1.5.1 From 23d701864fedbc40863b34c9c46c134295dd0a35 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 7 Jan 2021 08:03:38 -0500 Subject: Improve the performance of calculating ignored users in large rooms (#9024) This allows for efficiently finding which users ignore a particular user. Co-authored-by: Erik Johnston --- changelog.d/9024.feature | 1 + synapse/push/bulk_push_rule_evaluator.py | 12 +- synapse/storage/databases/main/account_data.py | 121 ++++++++++++++++----- .../main/schema/delta/59/01ignored_user.py | 82 ++++++++++++++ synapse/storage/prepare_database.py | 2 +- tests/storage/test_account_data.py | 120 ++++++++++++++++++++ 6 files changed, 304 insertions(+), 34 deletions(-) create mode 100644 changelog.d/9024.feature create mode 100644 synapse/storage/databases/main/schema/delta/59/01ignored_user.py create mode 100644 tests/storage/test_account_data.py (limited to 'synapse/storage/prepare_database.py') diff --git a/changelog.d/9024.feature b/changelog.d/9024.feature new file mode 100644 index 0000000000..073dafbf83 --- /dev/null +++ b/changelog.d/9024.feature @@ -0,0 +1 @@ +Improved performance when calculating ignored users in large rooms. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 10f27e4378..9018f9e20b 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -203,14 +203,18 @@ class BulkPushRuleEvaluator: condition_cache = {} # type: Dict[str, bool] + # If the event is not a state event check if any users ignore the sender. + if not event.is_state(): + ignorers = await self.store.ignored_by(event.sender) + else: + ignorers = set() + for uid, rules in rules_by_user.items(): if event.sender == uid: continue - if not event.is_state(): - is_ignored = await self.store.is_ignored_by(event.sender, uid) - if is_ignored: - continue + if uid in ignorers: + continue display_name = None profile_info = room_members.get(uid) diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 49ee23470d..bff51e92b9 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -16,7 +16,7 @@ import abc import logging -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Set, Tuple from synapse.api.constants import AccountDataTypes from synapse.storage._base import SQLBaseStore, db_to_json @@ -24,7 +24,7 @@ from synapse.storage.database import DatabasePool from synapse.storage.util.id_generators import StreamIdGenerator from synapse.types import JsonDict from synapse.util import json_encoder -from synapse.util.caches.descriptors import _CacheContext, cached +from synapse.util.caches.descriptors import cached from synapse.util.caches.stream_change_cache import StreamChangeCache logger = logging.getLogger(__name__) @@ -287,23 +287,25 @@ class AccountDataWorkerStore(SQLBaseStore, metaclass=abc.ABCMeta): "get_updated_account_data_for_user", get_updated_account_data_for_user_txn ) - @cached(num_args=2, cache_context=True, max_entries=5000) - async def is_ignored_by( - self, ignored_user_id: str, ignorer_user_id: str, cache_context: _CacheContext - ) -> bool: - ignored_account_data = await self.get_global_account_data_by_type_for_user( - AccountDataTypes.IGNORED_USER_LIST, - ignorer_user_id, - on_invalidate=cache_context.invalidate, - ) - if not ignored_account_data: - return False + @cached(max_entries=5000, iterable=True) + async def ignored_by(self, user_id: str) -> Set[str]: + """ + Get users which ignore the given user. - try: - return ignored_user_id in ignored_account_data.get("ignored_users", {}) - except TypeError: - # The type of the ignored_users field is invalid. - return False + Params: + user_id: The user ID which might be ignored. + + Return: + The user IDs which ignore the given user. + """ + return set( + await self.db_pool.simple_select_onecol( + table="ignored_users", + keyvalues={"ignored_user_id": user_id}, + retcol="ignorer_user_id", + desc="ignored_by", + ) + ) class AccountDataStore(AccountDataWorkerStore): @@ -390,18 +392,14 @@ class AccountDataStore(AccountDataWorkerStore): Returns: The maximum stream ID. """ - content_json = json_encoder.encode(content) - async with self._account_data_id_gen.get_next() as next_id: - # no need to lock here as account_data has a unique constraint on - # (user_id, account_data_type) so simple_upsert will retry if - # there is a conflict. - await self.db_pool.simple_upsert( - desc="add_user_account_data", - table="account_data", - keyvalues={"user_id": user_id, "account_data_type": account_data_type}, - values={"stream_id": next_id, "content": content_json}, - lock=False, + await self.db_pool.runInteraction( + "add_user_account_data", + self._add_account_data_for_user, + next_id, + user_id, + account_data_type, + content, ) # it's theoretically possible for the above to succeed and the @@ -424,6 +422,71 @@ class AccountDataStore(AccountDataWorkerStore): return self._account_data_id_gen.get_current_token() + def _add_account_data_for_user( + self, + txn, + next_id: int, + user_id: str, + account_data_type: str, + content: JsonDict, + ) -> None: + content_json = json_encoder.encode(content) + + # no need to lock here as account_data has a unique constraint on + # (user_id, account_data_type) so simple_upsert will retry if + # there is a conflict. + self.db_pool.simple_upsert_txn( + txn, + table="account_data", + keyvalues={"user_id": user_id, "account_data_type": account_data_type}, + values={"stream_id": next_id, "content": content_json}, + lock=False, + ) + + # Ignored users get denormalized into a separate table as an optimisation. + if account_data_type != AccountDataTypes.IGNORED_USER_LIST: + return + + # Insert / delete to sync the list of ignored users. + previously_ignored_users = set( + self.db_pool.simple_select_onecol_txn( + txn, + table="ignored_users", + keyvalues={"ignorer_user_id": user_id}, + retcol="ignored_user_id", + ) + ) + + # If the data is invalid, no one is ignored. + ignored_users_content = content.get("ignored_users", {}) + if isinstance(ignored_users_content, dict): + currently_ignored_users = set(ignored_users_content) + else: + currently_ignored_users = set() + + # Delete entries which are no longer ignored. + self.db_pool.simple_delete_many_txn( + txn, + table="ignored_users", + column="ignored_user_id", + iterable=previously_ignored_users - currently_ignored_users, + keyvalues={"ignorer_user_id": user_id}, + ) + + # Add entries which are newly ignored. + self.db_pool.simple_insert_many_txn( + txn, + table="ignored_users", + values=[ + {"ignorer_user_id": user_id, "ignored_user_id": u} + for u in currently_ignored_users - previously_ignored_users + ], + ) + + # Invalidate the cache for any ignored users which were added or removed. + for ignored_user_id in previously_ignored_users ^ currently_ignored_users: + self._invalidate_cache_and_stream(txn, self.ignored_by, (ignored_user_id,)) + async def _update_max_stream_id(self, next_id: int) -> None: """Update the max stream_id diff --git a/synapse/storage/databases/main/schema/delta/59/01ignored_user.py b/synapse/storage/databases/main/schema/delta/59/01ignored_user.py new file mode 100644 index 0000000000..f35c70b699 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/59/01ignored_user.py @@ -0,0 +1,82 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This migration denormalises the account_data table into an ignored users table. +""" + +import logging +from io import StringIO + +from synapse.storage._base import db_to_json +from synapse.storage.engines import BaseDatabaseEngine +from synapse.storage.prepare_database import execute_statements_from_stream +from synapse.storage.types import Cursor + +logger = logging.getLogger(__name__) + + +def run_upgrade(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs): + pass + + +def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs): + logger.info("Creating ignored_users table") + execute_statements_from_stream(cur, StringIO(_create_commands)) + + # We now upgrade existing data, if any. We don't do this in `run_upgrade` as + # we a) want to run these before adding constraints and b) `run_upgrade` is + # not run on empty databases. + insert_sql = """ + INSERT INTO ignored_users (ignorer_user_id, ignored_user_id) VALUES (?, ?) + """ + + logger.info("Converting existing ignore lists") + cur.execute( + "SELECT user_id, content FROM account_data WHERE account_data_type = 'm.ignored_user_list'" + ) + for user_id, content_json in cur.fetchall(): + content = db_to_json(content_json) + + # The content should be the form of a dictionary with a key + # "ignored_users" pointing to a dictionary with keys of ignored users. + # + # { "ignored_users": "@someone:example.org": {} } + ignored_users = content.get("ignored_users", {}) + if isinstance(ignored_users, dict) and ignored_users: + cur.executemany(insert_sql, [(user_id, u) for u in ignored_users]) + + # Add indexes after inserting data for efficiency. + logger.info("Adding constraints to ignored_users table") + execute_statements_from_stream(cur, StringIO(_constraints_commands)) + + +# there might be duplicates, so the easiest way to achieve this is to create a new +# table with the right data, and renaming it into place + +_create_commands = """ +-- Users which are ignored when calculating push notifications. This data is +-- denormalized from account data. +CREATE TABLE IF NOT EXISTS ignored_users( + ignorer_user_id TEXT NOT NULL, -- The user ID of the user who is ignoring another user. (This is a local user.) + ignored_user_id TEXT NOT NULL -- The user ID of the user who is being ignored. (This is a local or remote user.) +); +""" + +_constraints_commands = """ +CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users (ignorer_user_id, ignored_user_id); + +-- Add an index on ignored_users since look-ups are done to get all ignorers of an ignored user. +CREATE INDEX ignored_users_ignored_user_id ON ignored_users (ignored_user_id); +""" diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 6684403a0a..01efb2cabb 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -38,7 +38,7 @@ logger = logging.getLogger(__name__) # XXX: If you're about to bump this to 59 (or higher) please create an update # that drops the unused `cache_invalidation_stream` table, as per #7436! # XXX: Also add an update to drop `account_data_max_stream_id` as per #7656! -SCHEMA_VERSION = 58 +SCHEMA_VERSION = 59 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/tests/storage/test_account_data.py b/tests/storage/test_account_data.py new file mode 100644 index 0000000000..673e1fe3e3 --- /dev/null +++ b/tests/storage/test_account_data.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Iterable, Set + +from synapse.api.constants import AccountDataTypes + +from tests import unittest + + +class IgnoredUsersTestCase(unittest.HomeserverTestCase): + def prepare(self, hs, reactor, clock): + self.store = self.hs.get_datastore() + self.user = "@user:test" + + def _update_ignore_list( + self, *ignored_user_ids: Iterable[str], ignorer_user_id: str = None + ) -> None: + """Update the account data to block the given users.""" + if ignorer_user_id is None: + ignorer_user_id = self.user + + self.get_success( + self.store.add_account_data_for_user( + ignorer_user_id, + AccountDataTypes.IGNORED_USER_LIST, + {"ignored_users": {u: {} for u in ignored_user_ids}}, + ) + ) + + def assert_ignorers( + self, ignored_user_id: str, expected_ignorer_user_ids: Set[str] + ) -> None: + self.assertEqual( + self.get_success(self.store.ignored_by(ignored_user_id)), + expected_ignorer_user_ids, + ) + + def test_ignoring_users(self): + """Basic adding/removing of users from the ignore list.""" + self._update_ignore_list("@other:test", "@another:remote") + + # Check a user which no one ignores. + self.assert_ignorers("@user:test", set()) + + # Check a local user which is ignored. + self.assert_ignorers("@other:test", {self.user}) + + # Check a remote user which is ignored. + self.assert_ignorers("@another:remote", {self.user}) + + # Add one user, remove one user, and leave one user. + self._update_ignore_list("@foo:test", "@another:remote") + + # Check the removed user. + self.assert_ignorers("@other:test", set()) + + # Check the added user. + self.assert_ignorers("@foo:test", {self.user}) + + # Check the removed user. + self.assert_ignorers("@another:remote", {self.user}) + + def test_caching(self): + """Ensure that caching works properly between different users.""" + # The first user ignores a user. + self._update_ignore_list("@other:test") + self.assert_ignorers("@other:test", {self.user}) + + # The second user ignores them. + self._update_ignore_list("@other:test", ignorer_user_id="@second:test") + self.assert_ignorers("@other:test", {self.user, "@second:test"}) + + # The first user un-ignores them. + self._update_ignore_list() + self.assert_ignorers("@other:test", {"@second:test"}) + + def test_invalid_data(self): + """Invalid data ends up clearing out the ignored users list.""" + # Add some data and ensure it is there. + self._update_ignore_list("@other:test") + self.assert_ignorers("@other:test", {self.user}) + + # No ignored_users key. + self.get_success( + self.store.add_account_data_for_user( + self.user, AccountDataTypes.IGNORED_USER_LIST, {}, + ) + ) + + # No one ignores the user now. + self.assert_ignorers("@other:test", set()) + + # Add some data and ensure it is there. + self._update_ignore_list("@other:test") + self.assert_ignorers("@other:test", {self.user}) + + # Invalid data. + self.get_success( + self.store.add_account_data_for_user( + self.user, + AccountDataTypes.IGNORED_USER_LIST, + {"ignored_users": "unexpected"}, + ) + ) + + # No one ignores the user now. + self.assert_ignorers("@other:test", set()) -- cgit 1.5.1 From 4e04435bda135d3441777a51aa54dbd4c3925f2b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jan 2021 13:58:19 +0000 Subject: Remove old tables after schema version bump (#9055) These tables are unused, and can be dropped now the schema version has been bumped. --- changelog.d/9055.misc | 1 + synapse/storage/databases/main/account_data.py | 48 +--------------------- .../main/schema/delta/59/04drop_account_data.sql | 17 ++++++++ .../main/schema/delta/59/05cache_invalidation.sql | 17 ++++++++ synapse/storage/databases/main/tags.py | 10 ----- synapse/storage/prepare_database.py | 3 -- 6 files changed, 37 insertions(+), 59 deletions(-) create mode 100644 changelog.d/9055.misc create mode 100644 synapse/storage/databases/main/schema/delta/59/04drop_account_data.sql create mode 100644 synapse/storage/databases/main/schema/delta/59/05cache_invalidation.sql (limited to 'synapse/storage/prepare_database.py') diff --git a/changelog.d/9055.misc b/changelog.d/9055.misc new file mode 100644 index 0000000000..8e0512eb1e --- /dev/null +++ b/changelog.d/9055.misc @@ -0,0 +1 @@ +Drop unused database tables. diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index bff51e92b9..bad8260892 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -312,12 +312,9 @@ class AccountDataStore(AccountDataWorkerStore): def __init__(self, database: DatabasePool, db_conn, hs): self._account_data_id_gen = StreamIdGenerator( db_conn, - "account_data_max_stream_id", + "room_account_data", "stream_id", - extra_tables=[ - ("room_account_data", "stream_id"), - ("room_tags_revisions", "stream_id"), - ], + extra_tables=[("room_tags_revisions", "stream_id")], ) super().__init__(database, db_conn, hs) @@ -362,14 +359,6 @@ class AccountDataStore(AccountDataWorkerStore): lock=False, ) - # it's theoretically possible for the above to succeed and the - # below to fail - in which case we might reuse a stream id on - # restart, and the above update might not get propagated. That - # doesn't sound any worse than the whole update getting lost, - # which is what would happen if we combined the two into one - # transaction. - await self._update_max_stream_id(next_id) - self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id)) @@ -402,18 +391,6 @@ class AccountDataStore(AccountDataWorkerStore): content, ) - # it's theoretically possible for the above to succeed and the - # below to fail - in which case we might reuse a stream id on - # restart, and the above update might not get propagated. That - # doesn't sound any worse than the whole update getting lost, - # which is what would happen if we combined the two into one - # transaction. - # - # Note: This is only here for backwards compat to allow admins to - # roll back to a previous Synapse version. Next time we update the - # database version we can remove this table. - await self._update_max_stream_id(next_id) - self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) self.get_global_account_data_by_type_for_user.invalidate( @@ -486,24 +463,3 @@ class AccountDataStore(AccountDataWorkerStore): # Invalidate the cache for any ignored users which were added or removed. for ignored_user_id in previously_ignored_users ^ currently_ignored_users: self._invalidate_cache_and_stream(txn, self.ignored_by, (ignored_user_id,)) - - async def _update_max_stream_id(self, next_id: int) -> None: - """Update the max stream_id - - Args: - next_id: The the revision to advance to. - """ - - # Note: This is only here for backwards compat to allow admins to - # roll back to a previous Synapse version. Next time we update the - # database version we can remove this table. - - def _update(txn): - update_max_id_sql = ( - "UPDATE account_data_max_stream_id" - " SET stream_id = ?" - " WHERE stream_id < ?" - ) - txn.execute(update_max_id_sql, (next_id, next_id)) - - await self.db_pool.runInteraction("update_account_data_max_stream_id", _update) diff --git a/synapse/storage/databases/main/schema/delta/59/04drop_account_data.sql b/synapse/storage/databases/main/schema/delta/59/04drop_account_data.sql new file mode 100644 index 0000000000..64ab696cfe --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/59/04drop_account_data.sql @@ -0,0 +1,17 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This is no longer used and was only kept until we bumped the schema version. +DROP TABLE IF EXISTS account_data_max_stream_id; diff --git a/synapse/storage/databases/main/schema/delta/59/05cache_invalidation.sql b/synapse/storage/databases/main/schema/delta/59/05cache_invalidation.sql new file mode 100644 index 0000000000..fb71b360a0 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/59/05cache_invalidation.sql @@ -0,0 +1,17 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This is no longer used and was only kept until we bumped the schema version. +DROP TABLE IF EXISTS cache_invalidation_stream; diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py index 9f120d3cb6..74da9c49f2 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py @@ -255,16 +255,6 @@ class TagsStore(TagsWorkerStore): self._account_data_stream_cache.entity_has_changed, user_id, next_id ) - # Note: This is only here for backwards compat to allow admins to - # roll back to a previous Synapse version. Next time we update the - # database version we can remove this table. - update_max_id_sql = ( - "UPDATE account_data_max_stream_id" - " SET stream_id = ?" - " WHERE stream_id < ?" - ) - txn.execute(update_max_id_sql, (next_id, next_id)) - update_sql = ( "UPDATE room_tags_revisions" " SET stream_id = ?" diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 01efb2cabb..566ea19bae 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -35,9 +35,6 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -# XXX: If you're about to bump this to 59 (or higher) please create an update -# that drops the unused `cache_invalidation_stream` table, as per #7436! -# XXX: Also add an update to drop `account_data_max_stream_id` as per #7656! SCHEMA_VERSION = 59 dir_path = os.path.abspath(os.path.dirname(__file__)) -- cgit 1.5.1