diff options
Diffstat (limited to 'synapse/storage')
-rw-r--r-- | synapse/storage/_base.py | 24 | ||||
-rw-r--r-- | synapse/storage/devices.py | 39 | ||||
-rw-r--r-- | synapse/storage/e2e_room_keys.py | 16 | ||||
-rw-r--r-- | synapse/storage/end_to_end_keys.py | 38 | ||||
-rw-r--r-- | synapse/storage/events.py | 421 | ||||
-rw-r--r-- | synapse/storage/events_worker.py | 213 | ||||
-rw-r--r-- | synapse/storage/prepare_database.py | 24 | ||||
-rw-r--r-- | synapse/storage/pusher.py | 30 | ||||
-rw-r--r-- | synapse/storage/registration.py | 24 | ||||
-rw-r--r-- | synapse/storage/room.py | 35 | ||||
-rw-r--r-- | synapse/storage/schema/delta/56/add_spans_to_device_lists.sql | 20 | ||||
-rw-r--r-- | synapse/storage/schema/delta/56/drop_unused_event_tables.sql | 20 | ||||
-rw-r--r-- | synapse/storage/schema/delta/56/fix_room_keys_index.sql | 18 | ||||
-rw-r--r-- | synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql | 17 |
14 files changed, 632 insertions, 307 deletions
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 489ce82fae..abe16334ec 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1395,14 +1395,22 @@ class SQLBaseStore(object): """ txn.call_after(self._invalidate_state_caches, room_id, members_changed) - # We need to be careful that the size of the `members_changed` list - # isn't so large that it causes problems sending over replication, so we - # send them in chunks. - # Max line length is 16K, and max user ID length is 255, so 50 should - # be safe. - for chunk in batch_iter(members_changed, 50): - keys = itertools.chain([room_id], chunk) - self._send_invalidation_to_replication(txn, _CURRENT_STATE_CACHE_NAME, keys) + if members_changed: + # We need to be careful that the size of the `members_changed` list + # isn't so large that it causes problems sending over replication, so we + # send them in chunks. + # Max line length is 16K, and max user ID length is 255, so 50 should + # be safe. + for chunk in batch_iter(members_changed, 50): + keys = itertools.chain([room_id], chunk) + self._send_invalidation_to_replication( + txn, _CURRENT_STATE_CACHE_NAME, keys + ) + else: + # if no members changed, we still need to invalidate the other caches. + self._send_invalidation_to_replication( + txn, _CURRENT_STATE_CACHE_NAME, [room_id] + ) def _invalidate_state_caches(self, room_id, members_changed): """Invalidates caches that are based on the current state, but does diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 991e28ea24..f7716302f0 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -23,6 +23,11 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import Codes, StoreError +from synapse.logging.opentracing import ( + get_active_span_text_map, + trace, + whitelisted_homeserver, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore @@ -77,6 +82,7 @@ class DeviceWorkerStore(SQLBaseStore): return {d["device_id"]: d for d in devices} + @trace @defer.inlineCallbacks def get_devices_by_remote(self, destination, from_stream_id, limit): """Get stream of updates to send to remote servers @@ -131,8 +137,15 @@ class DeviceWorkerStore(SQLBaseStore): # (user_id, device_id) entries into a map, with the value being # the max stream_id across each set of duplicate entries # - # maps (user_id, device_id) -> stream_id + # maps (user_id, device_id) -> (stream_id, opentracing_context) # as long as their stream_id does not match that of the last row + # + # opentracing_context contains the opentracing metadata for the request + # that created the poke + # + # The most recent request's opentracing_context is used as the + # context which created the Edu. + query_map = {} for update in updates: if stream_id_cutoff is not None and update[2] >= stream_id_cutoff: @@ -140,7 +153,14 @@ class DeviceWorkerStore(SQLBaseStore): break key = (update[0], update[1]) - query_map[key] = max(query_map.get(key, 0), update[2]) + + update_context = update[3] + update_stream_id = update[2] + + previous_update_stream_id, _ = query_map.get(key, (0, None)) + + if update_stream_id > previous_update_stream_id: + query_map[key] = (update_stream_id, update_context) # If we didn't find any updates with a stream_id lower than the cutoff, it # means that there are more than limit updates all of which have the same @@ -175,7 +195,7 @@ class DeviceWorkerStore(SQLBaseStore): List: List of device updates """ sql = """ - SELECT user_id, device_id, stream_id FROM device_lists_outbound_pokes + SELECT user_id, device_id, stream_id, opentracing_context FROM device_lists_outbound_pokes WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? ORDER BY stream_id LIMIT ? @@ -191,8 +211,9 @@ class DeviceWorkerStore(SQLBaseStore): Args: destination (str): The host the device updates are intended for from_stream_id (int): The minimum stream_id to filter updates by, exclusive - query_map (Dict[(str, str): int]): Dictionary mapping - user_id/device_id to update stream_id + query_map (Dict[(str, str): (int, str|None)]): Dictionary mapping + user_id/device_id to update stream_id and the relevent json-encoded + opentracing context Returns: List[Dict]: List of objects representing an device update EDU @@ -214,12 +235,13 @@ class DeviceWorkerStore(SQLBaseStore): destination, user_id, from_stream_id ) for device_id, device in iteritems(user_devices): - stream_id = query_map[(user_id, device_id)] + stream_id, opentracing_context = query_map[(user_id, device_id)] result = { "user_id": user_id, "device_id": device_id, "prev_id": [prev_id] if prev_id else [], "stream_id": stream_id, + "org.matrix.opentracing_context": opentracing_context, } prev_id = stream_id @@ -834,6 +856,8 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): ], ) + context = get_active_span_text_map() + self._simple_insert_many_txn( txn, table="device_lists_outbound_pokes", @@ -845,6 +869,9 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "device_id": device_id, "sent": False, "ts": now, + "opentracing_context": json.dumps(context) + if whitelisted_homeserver(destination) + else None, } for destination in hosts for device_id in device_ids diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 99128f2df7..be2fe2bab6 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -18,6 +18,7 @@ import json from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.logging.opentracing import log_kv, trace from ._base import SQLBaseStore @@ -82,11 +83,11 @@ class EndToEndRoomKeyStore(SQLBaseStore): table="e2e_room_keys", keyvalues={ "user_id": user_id, + "version": version, "room_id": room_id, "session_id": session_id, }, values={ - "version": version, "first_message_index": room_key["first_message_index"], "forwarded_count": room_key["forwarded_count"], "is_verified": room_key["is_verified"], @@ -94,7 +95,16 @@ class EndToEndRoomKeyStore(SQLBaseStore): }, lock=False, ) + log_kv( + { + "message": "Set room key", + "room_id": room_id, + "session_id": session_id, + "room_key": room_key, + } + ) + @trace @defer.inlineCallbacks def get_e2e_room_keys(self, user_id, version, room_id=None, session_id=None): """Bulk get the E2E room keys for a given backup, optionally filtered to a given @@ -153,6 +163,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): return sessions + @trace @defer.inlineCallbacks def delete_e2e_room_keys(self, user_id, version, room_id=None, session_id=None): """Bulk delete the E2E room keys for a given backup, optionally filtered to a given @@ -236,6 +247,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): "get_e2e_room_keys_version_info", _get_e2e_room_keys_version_info_txn ) + @trace def create_e2e_room_keys_version(self, user_id, info): """Atomically creates a new version of this user's e2e_room_keys store with the given version info. @@ -276,6 +288,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn ) + @trace def update_e2e_room_keys_version(self, user_id, version, info): """Update a given backup version @@ -292,6 +305,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): desc="update_e2e_room_keys_version", ) + @trace def delete_e2e_room_keys_version(self, user_id, version=None): """Delete a given backup version of the user's room keys. Doesn't delete their actual key data. diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 6f524cedd9..b6f2538e84 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -18,12 +18,14 @@ from canonicaljson import encode_canonical_json from twisted.internet import defer +from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore, db_to_json class EndToEndKeyWorkerStore(SQLBaseStore): + @trace @defer.inlineCallbacks def get_e2e_device_keys( self, query_list, include_all_devices=False, include_deleted_devices=False @@ -40,6 +42,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". """ + set_tag("query_list", query_list) if not query_list: return {} @@ -57,9 +60,13 @@ class EndToEndKeyWorkerStore(SQLBaseStore): return results + @trace def _get_e2e_device_keys_txn( self, txn, query_list, include_all_devices=False, include_deleted_devices=False ): + set_tag("include_all_devices", include_all_devices) + set_tag("include_deleted_devices", include_deleted_devices) + query_clauses = [] query_params = [] @@ -104,6 +111,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): for user_id, device_id in deleted_devices: result.setdefault(user_id, {})[device_id] = None + log_kv(result) return result @defer.inlineCallbacks @@ -129,8 +137,9 @@ class EndToEndKeyWorkerStore(SQLBaseStore): keyvalues={"user_id": user_id, "device_id": device_id}, desc="add_e2e_one_time_keys_check", ) - - return {(row["algorithm"], row["key_id"]): row["key_json"] for row in rows} + result = {(row["algorithm"], row["key_id"]): row["key_json"] for row in rows} + log_kv({"message": "Fetched one time keys for user", "one_time_keys": result}) + return result @defer.inlineCallbacks def add_e2e_one_time_keys(self, user_id, device_id, time_now, new_keys): @@ -146,6 +155,9 @@ class EndToEndKeyWorkerStore(SQLBaseStore): """ def _add_e2e_one_time_keys(txn): + set_tag("user_id", user_id) + set_tag("device_id", device_id) + set_tag("new_keys", new_keys) # We are protected from race between lookup and insertion due to # a unique constraint. If there is a race of two calls to # `add_e2e_one_time_keys` then they'll conflict and we will only @@ -202,6 +214,11 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): """ def _set_e2e_device_keys_txn(txn): + set_tag("user_id", user_id) + set_tag("device_id", device_id) + set_tag("time_now", time_now) + set_tag("device_keys", device_keys) + old_key_json = self._simple_select_one_onecol_txn( txn, table="e2e_device_keys_json", @@ -215,6 +232,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): new_key_json = encode_canonical_json(device_keys).decode("utf-8") if old_key_json == new_key_json: + log_kv({"Message": "Device key already stored."}) return False self._simple_upsert_txn( @@ -223,7 +241,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): keyvalues={"user_id": user_id, "device_id": device_id}, values={"ts_added_ms": time_now, "key_json": new_key_json}, ) - + log_kv({"message": "Device keys stored."}) return True return self.runInteraction("set_e2e_device_keys", _set_e2e_device_keys_txn) @@ -231,6 +249,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): def claim_e2e_one_time_keys(self, query_list): """Take a list of one time keys out of the database""" + @trace def _claim_e2e_one_time_keys(txn): sql = ( "SELECT key_id, key_json FROM e2e_one_time_keys_json" @@ -252,7 +271,13 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): " AND key_id = ?" ) for user_id, device_id, algorithm, key_id in delete: + log_kv( + { + "message": "Executing claim e2e_one_time_keys transaction on database." + } + ) txn.execute(sql, (user_id, device_id, algorithm, key_id)) + log_kv({"message": "finished executing and invalidating cache"}) self._invalidate_cache_and_stream( txn, self.count_e2e_one_time_keys, (user_id, device_id) ) @@ -262,6 +287,13 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): def delete_e2e_keys_by_device(self, user_id, device_id): def delete_e2e_keys_by_device_txn(txn): + log_kv( + { + "message": "Deleting keys for device", + "device_id": device_id, + "user_id": user_id, + } + ) self._simple_delete_txn( txn, table="e2e_device_keys_json", diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 88c0180116..5a95c36a8b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -364,147 +364,161 @@ class EventsStore( if not events_and_contexts: return - if backfilled: - stream_ordering_manager = self._backfill_id_gen.get_next_mult( - len(events_and_contexts) - ) - else: - stream_ordering_manager = self._stream_id_gen.get_next_mult( - len(events_and_contexts) - ) - - with stream_ordering_manager as stream_orderings: - for (event, context), stream in zip(events_and_contexts, stream_orderings): - event.internal_metadata.stream_ordering = stream - - chunks = [ - events_and_contexts[x : x + 100] - for x in range(0, len(events_and_contexts), 100) - ] + chunks = [ + events_and_contexts[x : x + 100] + for x in range(0, len(events_and_contexts), 100) + ] - for chunk in chunks: - # We can't easily parallelize these since different chunks - # might contain the same event. :( + for chunk in chunks: + # We can't easily parallelize these since different chunks + # might contain the same event. :( - # NB: Assumes that we are only persisting events for one room - # at a time. + # NB: Assumes that we are only persisting events for one room + # at a time. - # map room_id->list[event_ids] giving the new forward - # extremities in each room - new_forward_extremeties = {} + # map room_id->list[event_ids] giving the new forward + # extremities in each room + new_forward_extremeties = {} - # map room_id->(type,state_key)->event_id tracking the full - # state in each room after adding these events. - # This is simply used to prefill the get_current_state_ids - # cache - current_state_for_room = {} + # map room_id->(type,state_key)->event_id tracking the full + # state in each room after adding these events. + # This is simply used to prefill the get_current_state_ids + # cache + current_state_for_room = {} - # map room_id->(to_delete, to_insert) where to_delete is a list - # of type/state keys to remove from current state, and to_insert - # is a map (type,key)->event_id giving the state delta in each - # room - state_delta_for_room = {} + # map room_id->(to_delete, to_insert) where to_delete is a list + # of type/state keys to remove from current state, and to_insert + # is a map (type,key)->event_id giving the state delta in each + # room + state_delta_for_room = {} - if not backfilled: - with Measure(self._clock, "_calculate_state_and_extrem"): - # Work out the new "current state" for each room. - # We do this by working out what the new extremities are and then - # calculating the state from that. - events_by_room = {} - for event, context in chunk: - events_by_room.setdefault(event.room_id, []).append( - (event, context) - ) - - for room_id, ev_ctx_rm in iteritems(events_by_room): - latest_event_ids = yield self.get_latest_event_ids_in_room( - room_id - ) - new_latest_event_ids = yield self._calculate_new_extremities( - room_id, ev_ctx_rm, latest_event_ids + if not backfilled: + with Measure(self._clock, "_calculate_state_and_extrem"): + # Work out the new "current state" for each room. + # We do this by working out what the new extremities are and then + # calculating the state from that. + events_by_room = {} + for event, context in chunk: + events_by_room.setdefault(event.room_id, []).append( + (event, context) + ) + + for room_id, ev_ctx_rm in iteritems(events_by_room): + latest_event_ids = yield self.get_latest_event_ids_in_room( + room_id + ) + new_latest_event_ids = yield self._calculate_new_extremities( + room_id, ev_ctx_rm, latest_event_ids + ) + + latest_event_ids = set(latest_event_ids) + if new_latest_event_ids == latest_event_ids: + # No change in extremities, so no change in state + continue + + # there should always be at least one forward extremity. + # (except during the initial persistence of the send_join + # results, in which case there will be no existing + # extremities, so we'll `continue` above and skip this bit.) + assert new_latest_event_ids, "No forward extremities left!" + + new_forward_extremeties[room_id] = new_latest_event_ids + + len_1 = ( + len(latest_event_ids) == 1 + and len(new_latest_event_ids) == 1 + ) + if len_1: + all_single_prev_not_state = all( + len(event.prev_event_ids()) == 1 + and not event.is_state() + for event, ctx in ev_ctx_rm ) - - latest_event_ids = set(latest_event_ids) - if new_latest_event_ids == latest_event_ids: - # No change in extremities, so no change in state + # Don't bother calculating state if they're just + # a long chain of single ancestor non-state events. + if all_single_prev_not_state: continue - # there should always be at least one forward extremity. - # (except during the initial persistence of the send_join - # results, in which case there will be no existing - # extremities, so we'll `continue` above and skip this bit.) - assert new_latest_event_ids, "No forward extremities left!" - - new_forward_extremeties[room_id] = new_latest_event_ids - - len_1 = ( - len(latest_event_ids) == 1 - and len(new_latest_event_ids) == 1 + state_delta_counter.inc() + if len(new_latest_event_ids) == 1: + state_delta_single_event_counter.inc() + + # This is a fairly handwavey check to see if we could + # have guessed what the delta would have been when + # processing one of these events. + # What we're interested in is if the latest extremities + # were the same when we created the event as they are + # now. When this server creates a new event (as opposed + # to receiving it over federation) it will use the + # forward extremities as the prev_events, so we can + # guess this by looking at the prev_events and checking + # if they match the current forward extremities. + for ev, _ in ev_ctx_rm: + prev_event_ids = set(ev.prev_event_ids()) + if latest_event_ids == prev_event_ids: + state_delta_reuse_delta_counter.inc() + break + + logger.info("Calculating state delta for room %s", room_id) + with Measure( + self._clock, "persist_events.get_new_state_after_events" + ): + res = yield self._get_new_state_after_events( + room_id, + ev_ctx_rm, + latest_event_ids, + new_latest_event_ids, ) - if len_1: - all_single_prev_not_state = all( - len(event.prev_event_ids()) == 1 - and not event.is_state() - for event, ctx in ev_ctx_rm - ) - # Don't bother calculating state if they're just - # a long chain of single ancestor non-state events. - if all_single_prev_not_state: - continue - - state_delta_counter.inc() - if len(new_latest_event_ids) == 1: - state_delta_single_event_counter.inc() - - # This is a fairly handwavey check to see if we could - # have guessed what the delta would have been when - # processing one of these events. - # What we're interested in is if the latest extremities - # were the same when we created the event as they are - # now. When this server creates a new event (as opposed - # to receiving it over federation) it will use the - # forward extremities as the prev_events, so we can - # guess this by looking at the prev_events and checking - # if they match the current forward extremities. - for ev, _ in ev_ctx_rm: - prev_event_ids = set(ev.prev_event_ids()) - if latest_event_ids == prev_event_ids: - state_delta_reuse_delta_counter.inc() - break - - logger.info("Calculating state delta for room %s", room_id) + current_state, delta_ids = res + + # If either are not None then there has been a change, + # and we need to work out the delta (or use that + # given) + if delta_ids is not None: + # If there is a delta we know that we've + # only added or replaced state, never + # removed keys entirely. + state_delta_for_room[room_id] = ([], delta_ids) + elif current_state is not None: with Measure( - self._clock, "persist_events.get_new_state_after_events" + self._clock, "persist_events.calculate_state_delta" ): - res = yield self._get_new_state_after_events( - room_id, - ev_ctx_rm, - latest_event_ids, - new_latest_event_ids, + delta = yield self._calculate_state_delta( + room_id, current_state ) - current_state, delta_ids = res - - # If either are not None then there has been a change, - # and we need to work out the delta (or use that - # given) - if delta_ids is not None: - # If there is a delta we know that we've - # only added or replaced state, never - # removed keys entirely. - state_delta_for_room[room_id] = ([], delta_ids) - elif current_state is not None: - with Measure( - self._clock, "persist_events.calculate_state_delta" - ): - delta = yield self._calculate_state_delta( - room_id, current_state - ) - state_delta_for_room[room_id] = delta - - # If we have the current_state then lets prefill - # the cache with it. - if current_state is not None: - current_state_for_room[room_id] = current_state + state_delta_for_room[room_id] = delta + + # If we have the current_state then lets prefill + # the cache with it. + if current_state is not None: + current_state_for_room[room_id] = current_state + + # We want to calculate the stream orderings as late as possible, as + # we only notify after all events with a lesser stream ordering have + # been persisted. I.e. if we spend 10s inside the with block then + # that will delay all subsequent events from being notified about. + # Hence why we do it down here rather than wrapping the entire + # function. + # + # Its safe to do this after calculating the state deltas etc as we + # only need to protect the *persistence* of the events. This is to + # ensure that queries of the form "fetch events since X" don't + # return events and stream positions after events that are still in + # flight, as otherwise subsequent requests "fetch event since Y" + # will not return those events. + # + # Note: Multiple instances of this function cannot be in flight at + # the same time for the same room. + if backfilled: + stream_ordering_manager = self._backfill_id_gen.get_next_mult( + len(chunk) + ) + else: + stream_ordering_manager = self._stream_id_gen.get_next_mult(len(chunk)) + + with stream_ordering_manager as stream_orderings: + for (event, context), stream in zip(chunk, stream_orderings): + event.internal_metadata.stream_ordering = stream yield self.runInteraction( "persist_events", @@ -1288,15 +1302,11 @@ class EventsStore( "event_reference_hashes", "event_search", "event_to_state_groups", - "guest_access", - "history_visibility", "local_invites", - "room_names", "state_events", "rejections", "redactions", "room_memberships", - "topics", ): txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -1440,10 +1450,10 @@ class EventsStore( for event, _ in events_and_contexts: if event.type == EventTypes.Name: - # Insert into the room_names and event_search tables. + # Insert into the event_search table. self._store_room_name_txn(txn, event) elif event.type == EventTypes.Topic: - # Insert into the topics table and event_search table. + # Insert into the event_search table. self._store_room_topic_txn(txn, event) elif event.type == EventTypes.Message: # Insert into the event_search table. @@ -1451,12 +1461,6 @@ class EventsStore( elif event.type == EventTypes.Redaction: # Insert into the redactions table. self._store_redaction(txn, event) - elif event.type == EventTypes.RoomHistoryVisibility: - # Insert into the event_search table. - self._store_history_visibility_txn(txn, event) - elif event.type == EventTypes.GuestAccess: - # Insert into the event_search table. - self._store_guest_access_txn(txn, event) self._handle_event_relations(txn, event) @@ -2177,6 +2181,143 @@ class EventsStore( return to_delete, to_dedelta + def purge_room(self, room_id): + """Deletes all record of a room + + Args: + room_id (str): + """ + + return self.runInteraction("purge_room", self._purge_room_txn, room_id) + + def _purge_room_txn(self, txn, room_id): + # first we have to delete the state groups states + logger.info("[purge] removing %s from state_groups_state", room_id) + + txn.execute( + """ + DELETE FROM state_groups_state WHERE state_group IN ( + SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) + WHERE events.room_id=? + ) + """, + (room_id,), + ) + + # ... and the state group edges + logger.info("[purge] removing %s from state_group_edges", room_id) + + txn.execute( + """ + DELETE FROM state_group_edges WHERE state_group IN ( + SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) + WHERE events.room_id=? + ) + """, + (room_id,), + ) + + # ... and the state groups + logger.info("[purge] removing %s from state_groups", room_id) + + txn.execute( + """ + DELETE FROM state_groups WHERE id IN ( + SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) + WHERE events.room_id=? + ) + """, + (room_id,), + ) + + # and then tables which lack an index on room_id but have one on event_id + for table in ( + "event_auth", + "event_edges", + "event_push_actions_staging", + "event_reference_hashes", + "event_relations", + "event_to_state_groups", + "redactions", + "rejections", + "state_events", + ): + logger.info("[purge] removing %s from %s", room_id, table) + + txn.execute( + """ + DELETE FROM %s WHERE event_id IN ( + SELECT event_id FROM events WHERE room_id=? + ) + """ + % (table,), + (room_id,), + ) + + # and finally, the tables with an index on room_id (or no useful index) + for table in ( + "current_state_events", + "event_backward_extremities", + "event_forward_extremities", + "event_json", + "event_push_actions", + "event_search", + "events", + "group_rooms", + "public_room_list_stream", + "receipts_graph", + "receipts_linearized", + "room_aliases", + "room_depth", + "room_memberships", + "room_state", + "room_stats", + "room_stats_earliest_token", + "rooms", + "stream_ordering_to_exterm", + "topics", + "users_in_public_rooms", + "users_who_share_private_rooms", + # no useful index, but let's clear them anyway + "appservice_room_list", + "e2e_room_keys", + "event_push_summary", + "pusher_throttle", + "group_summary_rooms", + "local_invites", + "room_account_data", + "room_tags", + ): + logger.info("[purge] removing %s from %s", room_id, table) + txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,)) + + # Other tables we do NOT need to clear out: + # + # - blocked_rooms + # This is important, to make sure that we don't accidentally rejoin a blocked + # room after it was purged + # + # - user_directory + # This has a room_id column, but it is unused + # + + # Other tables that we might want to consider clearing out include: + # + # - event_reports + # Given that these are intended for abuse management my initial + # inclination is to leave them in place. + # + # - current_state_delta_stream + # - ex_outlier_stream + # - room_tags_revisions + # The problem with these is that they are largeish and there is no room_id + # index on them. In any case we should be clearing out 'stream' tables + # periodically anyway (#5888) + + # TODO: we could probably usefully do a bunch of cache invalidation here + + logger.info("[purge] done") + @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 79680ee856..c6fa7f82fd 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -29,12 +29,7 @@ from synapse.api.room_versions import EventFormatVersions from synapse.events import FrozenEvent, event_type_from_format_version # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event -from synapse.logging.context import ( - LoggingContext, - PreserveLoggingContext, - make_deferred_yieldable, - run_in_background, -) +from synapse.logging.context import LoggingContext, PreserveLoggingContext from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id from synapse.util import batch_iter @@ -342,13 +337,12 @@ class EventsWorkerStore(SQLBaseStore): log_ctx = LoggingContext.current_context() log_ctx.record_event_fetch(len(missing_events_ids)) - # Note that _enqueue_events is also responsible for turning db rows + # Note that _get_events_from_db is also responsible for turning db rows # into FrozenEvents (via _get_event_from_row), which involves seeing if # the events have been redacted, and if so pulling the redaction event out # of the database to check it. # - # _enqueue_events is a bit of a rubbish name but naming is hard. - missing_events = yield self._enqueue_events( + missing_events = yield self._get_events_from_db( missing_events_ids, allow_rejected=allow_rejected ) @@ -421,28 +415,28 @@ class EventsWorkerStore(SQLBaseStore): The fetch requests. Each entry consists of a list of event ids to be fetched, and a deferred to be completed once the events have been fetched. + + The deferreds are callbacked with a dictionary mapping from event id + to event row. Note that it may well contain additional events that + were not part of this request. """ with Measure(self._clock, "_fetch_event_list"): try: - event_id_lists = list(zip(*event_list))[0] - event_ids = [item for sublist in event_id_lists for item in sublist] + events_to_fetch = set( + event_id for events, _ in event_list for event_id in events + ) row_dict = self._new_transaction( - conn, "do_fetch", [], [], self._fetch_event_rows, event_ids + conn, "do_fetch", [], [], self._fetch_event_rows, events_to_fetch ) # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([res[i] for i in ids if i in res]) - except Exception: - logger.exception("Failed to callback") + def fire(): + for _, d in event_list: + d.callback(row_dict) with PreserveLoggingContext(): - self.hs.get_reactor().callFromThread(fire, event_list, row_dict) + self.hs.get_reactor().callFromThread(fire) except Exception as e: logger.exception("do_fetch") @@ -457,13 +451,98 @@ class EventsWorkerStore(SQLBaseStore): self.hs.get_reactor().callFromThread(fire, event_list, e) @defer.inlineCallbacks - def _enqueue_events(self, events, allow_rejected=False): + def _get_events_from_db(self, event_ids, allow_rejected=False): + """Fetch a bunch of events from the database. + + Returned events will be added to the cache for future lookups. + + Args: + event_ids (Iterable[str]): The event_ids of the events to fetch + allow_rejected (bool): Whether to include rejected events + + Returns: + Deferred[Dict[str, _EventCacheEntry]]: + map from event id to result. May return extra events which + weren't asked for. + """ + fetched_events = {} + events_to_fetch = event_ids + + while events_to_fetch: + row_map = yield self._enqueue_events(events_to_fetch) + + # we need to recursively fetch any redactions of those events + redaction_ids = set() + for event_id in events_to_fetch: + row = row_map.get(event_id) + fetched_events[event_id] = row + if row: + redaction_ids.update(row["redactions"]) + + events_to_fetch = redaction_ids.difference(fetched_events.keys()) + if events_to_fetch: + logger.debug("Also fetching redaction events %s", events_to_fetch) + + # build a map from event_id to EventBase + event_map = {} + for event_id, row in fetched_events.items(): + if not row: + continue + assert row["event_id"] == event_id + + rejected_reason = row["rejected_reason"] + + if not allow_rejected and rejected_reason: + continue + + d = json.loads(row["json"]) + internal_metadata = json.loads(row["internal_metadata"]) + + format_version = row["format_version"] + if format_version is None: + # This means that we stored the event before we had the concept + # of a event format version, so it must be a V1 event. + format_version = EventFormatVersions.V1 + + original_ev = event_type_from_format_version(format_version)( + event_dict=d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + event_map[event_id] = original_ev + + # finally, we can decide whether each one nededs redacting, and build + # the cache entries. + result_map = {} + for event_id, original_ev in event_map.items(): + redactions = fetched_events[event_id]["redactions"] + redacted_event = self._maybe_redact_event_row( + original_ev, redactions, event_map + ) + + cache_entry = _EventCacheEntry( + event=original_ev, redacted_event=redacted_event + ) + + self._get_event_cache.prefill((event_id,), cache_entry) + result_map[event_id] = cache_entry + + return result_map + + @defer.inlineCallbacks + def _enqueue_events(self, events): """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. + + Args: + events (Iterable[str]): events to be fetched. + + Returns: + Deferred[Dict[str, Dict]]: map from event id to row data from the database. + May contain events that weren't requested. """ - if not events: - return {} events_d = defer.Deferred() with self._event_fetch_lock: @@ -482,32 +561,12 @@ class EventsWorkerStore(SQLBaseStore): "fetch_events", self.runWithConnection, self._do_fetch ) - logger.debug("Loading %d events", len(events)) + logger.debug("Loading %d events: %s", len(events), events) with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) - - if not allow_rejected: - rows[:] = [r for r in rows if r["rejected_reason"] is None] - - res = yield make_deferred_yieldable( - defer.gatherResults( - [ - run_in_background( - self._get_event_from_row, - row["internal_metadata"], - row["json"], - row["redactions"], - rejected_reason=row["rejected_reason"], - format_version=row["format_version"], - ) - for row in rows - ], - consumeErrors=True, - ) - ) + row_map = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(row_map)) - return {e.event.event_id: e for e in res if e} + return row_map def _fetch_event_rows(self, txn, event_ids): """Fetch event rows from the database @@ -580,50 +639,7 @@ class EventsWorkerStore(SQLBaseStore): return event_dict - @defer.inlineCallbacks - def _get_event_from_row( - self, internal_metadata, js, redactions, format_version, rejected_reason=None - ): - """Parse an event row which has been read from the database - - Args: - internal_metadata (str): json-encoded internal_metadata column - js (str): json-encoded event body from event_json - redactions (list[str]): a list of the events which claim to have redacted - this event, from the redactions table - format_version: (str): the 'format_version' column - rejected_reason (str|None): the reason this event was rejected, if any - - Returns: - _EventCacheEntry - """ - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if format_version is None: - # This means that we stored the event before we had the concept - # of a event format version, so it must be a V1 event. - format_version = EventFormatVersions.V1 - - original_ev = event_type_from_format_version(format_version)( - event_dict=d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = yield self._maybe_redact_event_row(original_ev, redactions) - - cache_entry = _EventCacheEntry( - event=original_ev, redacted_event=redacted_event - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - return cache_entry - - @defer.inlineCallbacks - def _maybe_redact_event_row(self, original_ev, redactions): + def _maybe_redact_event_row(self, original_ev, redactions, event_map): """Given an event object and a list of possible redacting event ids, determine whether to honour any of those redactions and if so return a redacted event. @@ -631,6 +647,8 @@ class EventsWorkerStore(SQLBaseStore): Args: original_ev (EventBase): redactions (iterable[str]): list of event ids of potential redaction events + event_map (dict[str, EventBase]): other events which have been fetched, in + which we can look up the redaaction events. Map from event id to event. Returns: Deferred[EventBase|None]: if the event should be redacted, a pruned @@ -640,15 +658,9 @@ class EventsWorkerStore(SQLBaseStore): # we choose to ignore redactions of m.room.create events. return None - if original_ev.type == "m.room.redaction": - # ... and redaction events - return None - - redaction_map = yield self._get_events_from_cache_or_db(redactions) - for redaction_id in redactions: - redaction_entry = redaction_map.get(redaction_id) - if not redaction_entry: + redaction_event = event_map.get(redaction_id) + if not redaction_event or redaction_event.rejected_reason: # we don't have the redaction event, or the redaction event was not # authorized. logger.debug( @@ -658,7 +670,6 @@ class EventsWorkerStore(SQLBaseStore): ) continue - redaction_event = redaction_entry.event if redaction_event.room_id != original_ev.room_id: logger.debug( "%s was redacted by %s but redaction was in a different room!", diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d20eacda59..e96eed8a6d 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -238,6 +238,13 @@ def _upgrade_existing_database( logger.debug("applied_delta_files: %s", applied_delta_files) + if isinstance(database_engine, PostgresEngine): + specific_engine_extension = ".postgres" + else: + specific_engine_extension = ".sqlite" + + specific_engine_extensions = (".sqlite", ".postgres") + for v in range(start_ver, SCHEMA_VERSION + 1): logger.info("Upgrading schema to v%d", v) @@ -274,15 +281,22 @@ def _upgrade_existing_database( # Sometimes .pyc files turn up anyway even though we've # disabled their generation; e.g. from distribution package # installers. Silently skip it - pass + continue elif ext == ".sql": # A plain old .sql file, just read and execute it logger.info("Applying schema %s", relative_path) executescript(cur, absolute_path) + elif ext == specific_engine_extension and root_name.endswith(".sql"): + # A .sql file specific to our engine; just read and execute it + logger.info("Applying engine-specific schema %s", relative_path) + executescript(cur, absolute_path) + elif ext in specific_engine_extensions and root_name.endswith(".sql"): + # A .sql file for a different engine; skip it. + continue else: # Not a valid delta file. - logger.warn( - "Found directory entry that did not end in .py or" " .sql: %s", + logger.warning( + "Found directory entry that did not end in .py or .sql: %s", relative_path, ) continue @@ -290,7 +304,7 @@ def _upgrade_existing_database( # Mark as done. cur.execute( database_engine.convert_param_style( - "INSERT INTO applied_schema_deltas (version, file)" " VALUES (?,?)" + "INSERT INTO applied_schema_deltas (version, file) VALUES (?,?)" ), (v, relative_path), ) @@ -298,7 +312,7 @@ def _upgrade_existing_database( cur.execute("DELETE FROM schema_version") cur.execute( database_engine.convert_param_style( - "INSERT INTO schema_version (version, upgraded)" " VALUES (?,?)" + "INSERT INTO schema_version (version, upgraded) VALUES (?,?)" ), (v, True), ) diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index be3d4d9ded..b431d24b8a 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -308,22 +308,36 @@ class PusherStore(PusherWorkerStore): def update_pusher_last_stream_ordering_and_success( self, app_id, pushkey, user_id, last_stream_ordering, last_success ): - yield self._simple_update_one( - "pushers", - {"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, - { + """Update the last stream ordering position we've processed up to for + the given pusher. + + Args: + app_id (str) + pushkey (str) + last_stream_ordering (int) + last_success (int) + + Returns: + Deferred[bool]: True if the pusher still exists; False if it has been deleted. + """ + updated = yield self._simple_update( + table="pushers", + keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, + updatevalues={ "last_stream_ordering": last_stream_ordering, "last_success": last_success, }, desc="update_pusher_last_stream_ordering_and_success", ) + return bool(updated) + @defer.inlineCallbacks def update_pusher_failing_since(self, app_id, pushkey, user_id, failing_since): - yield self._simple_update_one( - "pushers", - {"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, - {"failing_since": failing_since}, + yield self._simple_update( + table="pushers", + keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, + updatevalues={"failing_since": failing_since}, desc="update_pusher_failing_since", ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 55e4e84d71..3f50324253 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -56,6 +56,7 @@ class RegistrationWorkerStore(SQLBaseStore): "consent_server_notice_sent", "appservice_id", "creation_ts", + "user_type", ], allow_none=True, desc="get_user_by_id", @@ -272,6 +273,14 @@ class RegistrationWorkerStore(SQLBaseStore): @defer.inlineCallbacks def is_server_admin(self, user): + """Determines if a user is an admin of this homeserver. + + Args: + user (UserID): user ID of the user to test + + Returns (bool): + true iff the user is a server admin, false otherwise. + """ res = yield self._simple_select_one_onecol( table="users", keyvalues={"name": user.to_string()}, @@ -282,6 +291,21 @@ class RegistrationWorkerStore(SQLBaseStore): return res if res else False + def set_server_admin(self, user, admin): + """Sets whether a user is an admin of this homeserver. + + Args: + user (UserID): user ID of the user to test + admin (bool): true iff the user is to be a server admin, + false otherwise. + """ + return self._simple_update_one( + table="users", + keyvalues={"name": user.to_string()}, + updatevalues={"admin": 1 if admin else 0}, + desc="set_server_admin", + ) + def _query_for_auth(self, txn, token): sql = ( "SELECT users.name, users.is_guest, access_tokens.id as token_id," diff --git a/synapse/storage/room.py b/synapse/storage/room.py index bc606292b8..08e13f3a3b 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -386,32 +386,12 @@ class RoomStore(RoomWorkerStore, SearchStore): def _store_room_topic_txn(self, txn, event): if hasattr(event, "content") and "topic" in event.content: - self._simple_insert_txn( - txn, - "topics", - { - "event_id": event.event_id, - "room_id": event.room_id, - "topic": event.content["topic"], - }, - ) - self.store_event_search_txn( txn, event, "content.topic", event.content["topic"] ) def _store_room_name_txn(self, txn, event): if hasattr(event, "content") and "name" in event.content: - self._simple_insert_txn( - txn, - "room_names", - { - "event_id": event.event_id, - "room_id": event.room_id, - "name": event.content["name"], - }, - ) - self.store_event_search_txn( txn, event, "content.name", event.content["name"] ) @@ -422,21 +402,6 @@ class RoomStore(RoomWorkerStore, SearchStore): txn, event, "content.body", event.content["body"] ) - def _store_history_visibility_txn(self, txn, event): - self._store_content_index_txn(txn, event, "history_visibility") - - def _store_guest_access_txn(self, txn, event): - self._store_content_index_txn(txn, event, "guest_access") - - def _store_content_index_txn(self, txn, event, key): - if hasattr(event, "content") and key in event.content: - sql = ( - "INSERT INTO %(key)s" - " (event_id, room_id, %(key)s)" - " VALUES (?, ?, ?)" % {"key": key} - ) - txn.execute(sql, (event.event_id, event.room_id, event.content[key])) - def add_event_report( self, room_id, event_id, user_id, reason, content, received_ts ): diff --git a/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql b/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql new file mode 100644 index 0000000000..41807eb1e7 --- /dev/null +++ b/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Opentracing context data for inclusion in the device_list_update EDUs, as a + * json-encoded dictionary. NULL if opentracing is disabled (or not enabled for this destination). + */ +ALTER TABLE device_lists_outbound_pokes ADD opentracing_context TEXT; diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql new file mode 100644 index 0000000000..9f09922c67 --- /dev/null +++ b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- these tables are never used. +DROP TABLE IF EXISTS room_names; +DROP TABLE IF EXISTS topics; +DROP TABLE IF EXISTS history_visibility; +DROP TABLE IF EXISTS guest_access; diff --git a/synapse/storage/schema/delta/56/fix_room_keys_index.sql b/synapse/storage/schema/delta/56/fix_room_keys_index.sql new file mode 100644 index 0000000000..014cb3b538 --- /dev/null +++ b/synapse/storage/schema/delta/56/fix_room_keys_index.sql @@ -0,0 +1,18 @@ +/* Copyright 2019 Matrix.org Foundation CIC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- version is supposed to be part of the room keys index +CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys(user_id, version, room_id, session_id); +DROP INDEX IF EXISTS e2e_room_keys_idx; diff --git a/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql b/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql new file mode 100644 index 0000000000..149f8be8b6 --- /dev/null +++ b/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 Matrix.org Foundation CIC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- this was apparently forgotten when the table was created back in delta 53. +CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms(room_id); |