diff options
author | Richard van der Hoff <richard@matrix.org> | 2019-02-14 14:41:40 +0000 |
---|---|---|
committer | Richard van der Hoff <richard@matrix.org> | 2019-02-14 14:41:40 +0000 |
commit | 00cf679bf22737b6b9028ba0dada47e029fc0925 (patch) | |
tree | 329a57fbbde7507c8715f8fde3f2669bc21beea2 /synapse/storage | |
parent | Update MSC1711_certificates_FAQ.md (diff) | |
parent | 0.99.1 (diff) | |
download | synapse-00cf679bf22737b6b9028ba0dada47e029fc0925.tar.xz |
Merge tag 'v0.99.1'
Synapse 0.99.1 (2019-02-14) =========================== Features -------- - Include m.room.encryption on invites by default ([\#3902](https://github.com/matrix-org/synapse/issues/3902)) - Federation OpenID listener resource can now be activated even if federation is disabled ([\#4420](https://github.com/matrix-org/synapse/issues/4420)) - Synapse's ACME support will now correctly reprovision a certificate that approaches its expiry while Synapse is running. ([\#4522](https://github.com/matrix-org/synapse/issues/4522)) - Add ability to update backup versions ([\#4580](https://github.com/matrix-org/synapse/issues/4580)) - Allow the "unavailable" presence status for /sync. This change makes Synapse compliant with r0.4.0 of the Client-Server specification. ([\#4592](https://github.com/matrix-org/synapse/issues/4592)) - There is no longer any need to specify `no_tls`: it is inferred from the absence of TLS listeners ([\#4613](https://github.com/matrix-org/synapse/issues/4613), [\#4615](https://github.com/matrix-org/synapse/issues/4615), [\#4617](https://github.com/matrix-org/synapse/issues/4617), [\#4636](https://github.com/matrix-org/synapse/issues/4636)) - The default configuration no longer requires TLS certificates. ([\#4614](https://github.com/matrix-org/synapse/issues/4614)) Bugfixes -------- - Copy over room federation ability on room upgrade. ([\#4530](https://github.com/matrix-org/synapse/issues/4530)) - Fix noisy "twisted.internet.task.TaskStopped" errors in logs ([\#4546](https://github.com/matrix-org/synapse/issues/4546)) - Synapse is now tolerant of the `tls_fingerprints` option being None or not specified. ([\#4589](https://github.com/matrix-org/synapse/issues/4589)) - Fix 'no unique or exclusion constraint' error ([\#4591](https://github.com/matrix-org/synapse/issues/4591)) - Transfer Server ACLs on room upgrade. ([\#4608](https://github.com/matrix-org/synapse/issues/4608)) - Fix failure to start when not TLS certificate was given even if TLS was disabled. ([\#4618](https://github.com/matrix-org/synapse/issues/4618)) - Fix self-signed cert notice from generate-config. ([\#4625](https://github.com/matrix-org/synapse/issues/4625)) - Fix performance of `user_ips` table deduplication background update ([\#4626](https://github.com/matrix-org/synapse/issues/4626), [\#4627](https://github.com/matrix-org/synapse/issues/4627)) Internal Changes ---------------- - Change the user directory state query to use a filtered call to the db instead of a generic one. ([\#4462](https://github.com/matrix-org/synapse/issues/4462)) - Reject federation transactions if they include more than 50 PDUs or 100 EDUs. ([\#4513](https://github.com/matrix-org/synapse/issues/4513)) - Reduce duplication of ``synapse.app`` code. ([\#4567](https://github.com/matrix-org/synapse/issues/4567)) - Fix docker upload job to push -py2 images. ([\#4576](https://github.com/matrix-org/synapse/issues/4576)) - Add port configuration information to ACME instructions. ([\#4578](https://github.com/matrix-org/synapse/issues/4578)) - Update MSC1711 FAQ to calrify .well-known usage ([\#4584](https://github.com/matrix-org/synapse/issues/4584)) - Clean up default listener configuration ([\#4586](https://github.com/matrix-org/synapse/issues/4586)) - Clarifications for reverse proxy docs ([\#4607](https://github.com/matrix-org/synapse/issues/4607)) - Move ClientTLSOptionsFactory init out of `refresh_certificates` ([\#4611](https://github.com/matrix-org/synapse/issues/4611)) - Fail cleanly if listener config lacks a 'port' ([\#4616](https://github.com/matrix-org/synapse/issues/4616)) - Remove redundant entries from docker config ([\#4619](https://github.com/matrix-org/synapse/issues/4619)) - README updates ([\#4621](https://github.com/matrix-org/synapse/issues/4621))
Diffstat (limited to 'synapse/storage')
-rw-r--r-- | synapse/storage/_base.py | 27 | ||||
-rw-r--r-- | synapse/storage/client_ips.py | 87 | ||||
-rw-r--r-- | synapse/storage/e2e_room_keys.py | 21 | ||||
-rw-r--r-- | synapse/storage/schema/delta/53/user_ips_index.sql | 10 | ||||
-rw-r--r-- | synapse/storage/state.py | 38 | ||||
-rw-r--r-- | synapse/storage/user_directory.py | 16 |
6 files changed, 172 insertions, 27 deletions
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 4872ff55b6..e124161845 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -50,6 +50,21 @@ sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"]) sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"]) +# Unique indexes which have been added in background updates. Maps from table name +# to the name of the background update which added the unique index to that table. +# +# This is used by the upsert logic to figure out which tables are safe to do a proper +# UPSERT on: until the relevant background update has completed, we +# have to emulate an upsert by locking the table. +# +UNIQUE_INDEX_BACKGROUND_UPDATES = { + "user_ips": "user_ips_device_unique_index", + "device_lists_remote_extremeties": "device_lists_remote_extremeties_unique_idx", + "device_lists_remote_cache": "device_lists_remote_cache_unique_idx", + "event_search": "event_search_event_id_idx", +} + + class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() @@ -194,7 +209,7 @@ class SQLBaseStore(object): self.database_engine = hs.database_engine # A set of tables that are not safe to use native upserts in. - self._unsafe_to_upsert_tables = {"user_ips"} + self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys()) # We add the user_directory_search table to the blacklist on SQLite # because the existing search table does not have an index, making it @@ -230,12 +245,12 @@ class SQLBaseStore(object): ) updates = [x["update_name"] for x in updates] - # The User IPs table in schema #53 was missing a unique index, which we - # run as a background update. - if "user_ips_device_unique_index" not in updates: - self._unsafe_to_upsert_tables.discard("user_ips") + for table, update_name in UNIQUE_INDEX_BACKGROUND_UPDATES.items(): + if update_name not in updates: + logger.debug("Now safe to upsert in %s", table) + self._unsafe_to_upsert_tables.discard(table) - # If there's any tables left to check, reschedule to run. + # If there's any updates still running, reschedule to run. if updates: self._clock.call_later( 15.0, diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 091d7116c5..9c21362226 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -66,6 +66,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) self.register_background_update_handler( + "user_ips_analyze", + self._analyze_user_ip, + ) + + self.register_background_update_handler( "user_ips_remove_dupes", self._remove_user_ip_dupes, ) @@ -109,6 +114,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): defer.returnValue(1) @defer.inlineCallbacks + def _analyze_user_ip(self, progress, batch_size): + # Background update to analyze user_ips table before we run the + # deduplication background update. The table may not have been analyzed + # for ages due to the table locks. + # + # This will lock out the naive upserts to user_ips while it happens, but + # the analyze should be quick (28GB table takes ~10s) + def user_ips_analyze(txn): + txn.execute("ANALYZE user_ips") + + yield self.runInteraction( + "user_ips_analyze", user_ips_analyze + ) + + yield self._end_background_update("user_ips_analyze") + + defer.returnValue(1) + + @defer.inlineCallbacks def _remove_user_ip_dupes(self, progress, batch_size): # This works function works by scanning the user_ips table in batches # based on `last_seen`. For each row in a batch it searches the rest of @@ -167,12 +191,16 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): clause = "? <= last_seen AND last_seen < ?" args = (begin_last_seen, end_last_seen) + # (Note: The DISTINCT in the inner query is important to ensure that + # the COUNT(*) is accurate, otherwise double counting may happen due + # to the join effectively being a cross product) txn.execute( """ SELECT user_id, access_token, ip, - MAX(device_id), MAX(user_agent), MAX(last_seen) + MAX(device_id), MAX(user_agent), MAX(last_seen), + COUNT(*) FROM ( - SELECT user_id, access_token, ip + SELECT DISTINCT user_id, access_token, ip FROM user_ips WHERE {} ) c @@ -186,7 +214,60 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): # We've got some duplicates for i in res: - user_id, access_token, ip, device_id, user_agent, last_seen = i + user_id, access_token, ip, device_id, user_agent, last_seen, count = i + + # We want to delete the duplicates so we end up with only a + # single row. + # + # The naive way of doing this would be just to delete all rows + # and reinsert a constructed row. However, if there are a lot of + # duplicate rows this can cause the table to grow a lot, which + # can be problematic in two ways: + # 1. If user_ips is already large then this can cause the + # table to rapidly grow, potentially filling the disk. + # 2. Reinserting a lot of rows can confuse the table + # statistics for postgres, causing it to not use the + # correct indices for the query above, resulting in a full + # table scan. This is incredibly slow for large tables and + # can kill database performance. (This seems to mainly + # happen for the last query where the clause is simply `? < + # last_seen`) + # + # So instead we want to delete all but *one* of the duplicate + # rows. That is hard to do reliably, so we cheat and do a two + # step process: + # 1. Delete all rows with a last_seen strictly less than the + # max last_seen. This hopefully results in deleting all but + # one row the majority of the time, but there may be + # duplicate last_seen + # 2. If multiple rows remain, we fall back to the naive method + # and simply delete all rows and reinsert. + # + # Note that this relies on no new duplicate rows being inserted, + # but if that is happening then this entire process is futile + # anyway. + + # Do step 1: + + txn.execute( + """ + DELETE FROM user_ips + WHERE user_id = ? AND access_token = ? AND ip = ? AND last_seen < ? + """, + (user_id, access_token, ip, last_seen) + ) + if txn.rowcount == count - 1: + # We deleted all but one of the duplicate rows, i.e. there + # is exactly one remaining and so there is nothing left to + # do. + continue + elif txn.rowcount >= count: + raise Exception( + "We deleted more duplicate rows from 'user_ips' than expected", + ) + + # The previous step didn't delete enough rows, so we fallback to + # step 2: # Drop all the duplicates txn.execute( diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 45cebe61d1..9a3aec759e 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -298,6 +298,27 @@ class EndToEndRoomKeyStore(SQLBaseStore): "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn ) + def update_e2e_room_keys_version(self, user_id, version, info): + """Update a given backup version + + Args: + user_id(str): the user whose backup version we're updating + version(str): the version ID of the backup version we're updating + info(dict): the new backup version info to store + """ + + return self._simple_update( + table="e2e_room_keys_versions", + keyvalues={ + "user_id": user_id, + "version": version, + }, + updatevalues={ + "auth_data": json.dumps(info["auth_data"]), + }, + desc="update_e2e_room_keys_version" + ) + def delete_e2e_room_keys_version(self, user_id, version=None): """Delete a given backup version of the user's room keys. Doesn't delete their actual key data. diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql index 4ca346c111..b812c5794f 100644 --- a/synapse/storage/schema/delta/53/user_ips_index.sql +++ b/synapse/storage/schema/delta/53/user_ips_index.sql @@ -13,9 +13,13 @@ * limitations under the License. */ --- delete duplicates + -- analyze user_ips, to help ensure the correct indices are used INSERT INTO background_updates (update_name, progress_json) VALUES - ('user_ips_remove_dupes', '{}'); + ('user_ips_analyze', '{}'); + +-- delete duplicates +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_remove_dupes', '{}', 'user_ips_analyze'); -- add a new unique index to user_ips table INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES @@ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES -- drop the old original index INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); \ No newline at end of file + ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c3ab7db7ae..d14a7b2538 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -428,13 +428,9 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): """ # for now we do this by looking at the create event. We may want to cache this # more intelligently in future. - state_ids = yield self.get_current_state_ids(room_id) - create_id = state_ids.get((EventTypes.Create, "")) - - if not create_id: - raise NotFoundError("Unknown room %s" % (room_id)) - create_event = yield self.get_event(create_id) + # Retrieve the room's create event + create_event = yield self.get_create_event_for_room(room_id) defer.returnValue(create_event.content.get("room_version", "1")) @defer.inlineCallbacks @@ -447,19 +443,39 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: Deferred[unicode|None]: predecessor room id + + Raises: + NotFoundError if the room is unknown + """ + # Retrieve the room's create event + create_event = yield self.get_create_event_for_room(room_id) + + # Return predecessor if present + defer.returnValue(create_event.content.get("predecessor", None)) + + @defer.inlineCallbacks + def get_create_event_for_room(self, room_id): + """Get the create state event for a room. + + Args: + room_id (str) + + Returns: + Deferred[EventBase]: The room creation event. + + Raises: + NotFoundError if the room is unknown """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) # If we can't find the create event, assume we've hit a dead end if not create_id: - defer.returnValue(None) + raise NotFoundError("Unknown room %s" % (room_id)) - # Retrieve the room's create event + # Retrieve the room's create event and return create_event = yield self.get_event(create_id) - - # Return predecessor if present - defer.returnValue(create_event.content.get("predecessor", None)) + defer.returnValue(create_event) @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index ce48212265..e8b574ee5e 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -22,6 +22,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -31,12 +32,19 @@ logger = logging.getLogger(__name__) class UserDirectoryStore(SQLBaseStore): - @cachedInlineCallbacks(cache_context=True) - def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context): + @defer.inlineCallbacks + def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable """ - current_state_ids = yield self.get_current_state_ids( - room_id, on_invalidate=cache_context.invalidate + + # Create a state filter that only queries join and history state event + types_to_filter = ( + (EventTypes.JoinRules, ""), + (EventTypes.RoomHistoryVisibility, ""), + ) + + current_state_ids = yield self.get_filtered_current_state_ids( + room_id, StateFilter.from_types(types_to_filter), ) join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) |