From 50122754c8743df5c904e81b634fdfdeea64e795 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 21 Jul 2022 08:01:52 -0400 Subject: Add missing types to opentracing. (#13345) After this change `synapse.logging` is fully typed. --- synapse/federation/transport/server/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/federation') diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index 84100a5a52..bb0f8d6b7b 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -309,7 +309,7 @@ class BaseFederationServlet: raise # update the active opentracing span with the authenticated entity - set_tag("authenticated_entity", origin) + set_tag("authenticated_entity", str(origin)) # if the origin is authenticated and whitelisted, use its span context # as the parent. -- cgit 1.4.1 From 4f3082d6bf85335d10775a2e869420592189c6b2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 27 Jul 2022 04:40:04 -0500 Subject: Fix `get_pdu` asking every remote destination even after it finds an event (#13346) --- changelog.d/13346.misc | 1 + synapse/federation/federation_client.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13346.misc (limited to 'synapse/federation') diff --git a/changelog.d/13346.misc b/changelog.d/13346.misc new file mode 100644 index 0000000000..06557c8481 --- /dev/null +++ b/changelog.d/13346.misc @@ -0,0 +1 @@ +Fix long-standing bugged logic which was never hit in `get_pdu` asking every remote destination even after it finds an event. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 842f5327c2..02276ed995 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -403,9 +403,9 @@ class FederationClient(FederationBase): # Prime the cache self._get_pdu_cache[event.event_id] = event - # FIXME: We should add a `break` here to avoid calling every - # destination after we already found a PDU (will follow-up - # in a separate PR) + # Now that we have an event, we can break out of this + # loop and stop asking other destinations. + break except SynapseError as e: logger.info( -- cgit 1.4.1 From 39be5bc550f2e882b4754c7d98c906d9bde8b649 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 27 Jul 2022 10:37:50 +0000 Subject: Make minor clarifications to the error messages given when we fail to join a room via any server. (#13160) --- changelog.d/13160.misc | 1 + synapse/federation/federation_client.py | 8 +++++++- synapse/handlers/room_member.py | 6 +++++- tests/rest/admin/test_room.py | 5 ++++- 4 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13160.misc (limited to 'synapse/federation') diff --git a/changelog.d/13160.misc b/changelog.d/13160.misc new file mode 100644 index 0000000000..36ff50c2a6 --- /dev/null +++ b/changelog.d/13160.misc @@ -0,0 +1 @@ +Make minor clarifications to the error messages given when we fail to join a room via any server. \ No newline at end of file diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 02276ed995..6a8d76529b 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -725,6 +725,12 @@ class FederationClient(FederationBase): if failover_errcodes is None: failover_errcodes = () + if not destinations: + # Give a bit of a clearer message if no servers were specified at all. + raise SynapseError( + 502, f"Failed to {description} via any server: No servers specified." + ) + for destination in destinations: if destination == self.server_name: continue @@ -774,7 +780,7 @@ class FederationClient(FederationBase): "Failed to %s via %s", description, destination, exc_info=True ) - raise SynapseError(502, "Failed to %s via any server" % (description,)) + raise SynapseError(502, f"Failed to {description} via any server") async def make_membership_event( self, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 30b4cb23df..520c52e013 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -1679,7 +1679,11 @@ class RoomMemberMasterHandler(RoomMemberHandler): ] if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") + raise SynapseError( + 404, + "Can't join remote room because no servers " + "that are in the room have been provided.", + ) check_complexity = self.hs.config.server.limit_remote_rooms.enabled if ( diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 2526136ff8..623883b53c 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -1873,7 +1873,10 @@ class JoinAliasRoomTestCase(unittest.HomeserverTestCase): ) self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body) - self.assertEqual("No known servers", channel.json_body["error"]) + self.assertEqual( + "Can't join remote room because no servers that are in the room have been provided.", + channel.json_body["error"], + ) def test_room_is_not_valid(self) -> None: """ -- cgit 1.4.1 From 502f075e96b458a183952ae2be402f00b28af299 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Wed, 27 Jul 2022 13:44:40 +0100 Subject: Implement MSC3848: Introduce errcodes for specific event sending failures (#13343) Implements MSC3848 --- changelog.d/13343.feature | 1 + synapse/api/auth.py | 11 +++-- synapse/api/errors.py | 58 ++++++++++++++++++++++----- synapse/config/experimental.py | 3 ++ synapse/event_auth.py | 62 ++++++++++++++++++++++++----- synapse/federation/federation_server.py | 2 +- synapse/handlers/auth.py | 2 +- synapse/handlers/message.py | 13 +++++- synapse/handlers/room_summary.py | 5 ++- synapse/http/server.py | 18 +++++++-- tests/rest/client/test_third_party_rules.py | 5 ++- 11 files changed, 144 insertions(+), 36 deletions(-) create mode 100644 changelog.d/13343.feature (limited to 'synapse/federation') diff --git a/changelog.d/13343.feature b/changelog.d/13343.feature new file mode 100644 index 0000000000..c151251e54 --- /dev/null +++ b/changelog.d/13343.feature @@ -0,0 +1 @@ +Add new unstable error codes `ORG.MATRIX.MSC3848.ALREADY_JOINED`, `ORG.MATRIX.MSC3848.NOT_JOINED`, and `ORG.MATRIX.MSC3848.INSUFFICIENT_POWER` described in MSC3848. \ No newline at end of file diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 6e6eaf3805..82e6475ef5 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -26,6 +26,7 @@ from synapse.api.errors import ( Codes, InvalidClientTokenError, MissingClientTokenError, + UnstableSpecAuthError, ) from synapse.appservice import ApplicationService from synapse.http import get_request_user_agent @@ -106,8 +107,11 @@ class Auth: forgot = await self.store.did_forget(user_id, room_id) if not forgot: return membership, member_event_id - - raise AuthError(403, "User %s not in room %s" % (user_id, room_id)) + raise UnstableSpecAuthError( + 403, + "User %s not in room %s" % (user_id, room_id), + errcode=Codes.NOT_JOINED, + ) async def get_user_by_req( self, @@ -600,8 +604,9 @@ class Auth: == HistoryVisibility.WORLD_READABLE ): return Membership.JOIN, None - raise AuthError( + raise UnstableSpecAuthError( 403, "User %s not in room %s, and room previews are disabled" % (user_id, room_id), + errcode=Codes.NOT_JOINED, ) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 1c74e131f2..e6dea89c6d 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -26,6 +26,7 @@ from twisted.web import http from synapse.util import json_decoder if typing.TYPE_CHECKING: + from synapse.config.homeserver import HomeServerConfig from synapse.types import JsonDict logger = logging.getLogger(__name__) @@ -80,6 +81,12 @@ class Codes(str, Enum): INVALID_SIGNATURE = "M_INVALID_SIGNATURE" USER_DEACTIVATED = "M_USER_DEACTIVATED" + # Part of MSC3848 + # https://github.com/matrix-org/matrix-spec-proposals/pull/3848 + ALREADY_JOINED = "ORG.MATRIX.MSC3848.ALREADY_JOINED" + NOT_JOINED = "ORG.MATRIX.MSC3848.NOT_JOINED" + INSUFFICIENT_POWER = "ORG.MATRIX.MSC3848.INSUFFICIENT_POWER" + # The account has been suspended on the server. # By opposition to `USER_DEACTIVATED`, this is a reversible measure # that can possibly be appealed and reverted. @@ -167,7 +174,7 @@ class SynapseError(CodeMessageException): else: self._additional_fields = dict(additional_fields) - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error(self.msg, self.errcode, **self._additional_fields) @@ -213,7 +220,7 @@ class ConsentNotGivenError(SynapseError): ) self._consent_uri = consent_uri - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error(self.msg, self.errcode, consent_uri=self._consent_uri) @@ -307,6 +314,37 @@ class AuthError(SynapseError): super().__init__(code, msg, errcode, additional_fields) +class UnstableSpecAuthError(AuthError): + """An error raised when a new error code is being proposed to replace a previous one. + This error will return a "org.matrix.unstable.errcode" property with the new error code, + with the previous error code still being defined in the "errcode" property. + + This error will include `org.matrix.msc3848.unstable.errcode` in the C-S error body. + """ + + def __init__( + self, + code: int, + msg: str, + errcode: str, + previous_errcode: str = Codes.FORBIDDEN, + additional_fields: Optional[dict] = None, + ): + self.previous_errcode = previous_errcode + super().__init__(code, msg, errcode, additional_fields) + + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": + fields = {} + if config is not None and config.experimental.msc3848_enabled: + fields["org.matrix.msc3848.unstable.errcode"] = self.errcode + return cs_error( + self.msg, + self.previous_errcode, + **fields, + **self._additional_fields, + ) + + class InvalidClientCredentialsError(SynapseError): """An error raised when there was a problem with the authorisation credentials in a client request. @@ -338,8 +376,8 @@ class InvalidClientTokenError(InvalidClientCredentialsError): super().__init__(msg=msg, errcode="M_UNKNOWN_TOKEN") self._soft_logout = soft_logout - def error_dict(self) -> "JsonDict": - d = super().error_dict() + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": + d = super().error_dict(config) d["soft_logout"] = self._soft_logout return d @@ -362,7 +400,7 @@ class ResourceLimitError(SynapseError): self.limit_type = limit_type super().__init__(code, msg, errcode=errcode) - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error( self.msg, self.errcode, @@ -397,7 +435,7 @@ class InvalidCaptchaError(SynapseError): super().__init__(code, msg, errcode) self.error_url = error_url - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error(self.msg, self.errcode, error_url=self.error_url) @@ -414,7 +452,7 @@ class LimitExceededError(SynapseError): super().__init__(code, msg, errcode) self.retry_after_ms = retry_after_ms - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error(self.msg, self.errcode, retry_after_ms=self.retry_after_ms) @@ -429,7 +467,7 @@ class RoomKeysVersionError(SynapseError): super().__init__(403, "Wrong room_keys version", Codes.WRONG_ROOM_KEYS_VERSION) self.current_version = current_version - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error(self.msg, self.errcode, current_version=self.current_version) @@ -469,7 +507,7 @@ class IncompatibleRoomVersionError(SynapseError): self._room_version = room_version - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error(self.msg, self.errcode, room_version=self._room_version) @@ -515,7 +553,7 @@ class UnredactedContentDeletedError(SynapseError): ) self.content_keep_ms = content_keep_ms - def error_dict(self) -> "JsonDict": + def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": extra = {} if self.content_keep_ms is not None: extra = {"fi.mau.msc2815.content_keep_ms": self.content_keep_ms} diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index ee443cea00..1902222d7b 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -90,3 +90,6 @@ class ExperimentalConfig(Config): # MSC3827: Filtering of /publicRooms by room type self.msc3827_enabled: bool = experimental.get("msc3827_enabled", False) + + # MSC3848: Introduce errcodes for specific event sending failures + self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False) diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 965cb265da..389b0c5d53 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -30,7 +30,13 @@ from synapse.api.constants import ( JoinRules, Membership, ) -from synapse.api.errors import AuthError, EventSizeError, SynapseError +from synapse.api.errors import ( + AuthError, + Codes, + EventSizeError, + SynapseError, + UnstableSpecAuthError, +) from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, EventFormatVersions, @@ -291,7 +297,11 @@ def check_state_dependent_auth_rules( invite_level = get_named_level(auth_dict, "invite", 0) if user_level < invite_level: - raise AuthError(403, "You don't have permission to invite users") + raise UnstableSpecAuthError( + 403, + "You don't have permission to invite users", + errcode=Codes.INSUFFICIENT_POWER, + ) else: logger.debug("Allowing! %s", event) return @@ -474,7 +484,11 @@ def _is_membership_change_allowed( return if not caller_in_room: # caller isn't joined - raise AuthError(403, "%s not in room %s." % (event.user_id, event.room_id)) + raise UnstableSpecAuthError( + 403, + "%s not in room %s." % (event.user_id, event.room_id), + errcode=Codes.NOT_JOINED, + ) if Membership.INVITE == membership: # TODO (erikj): We should probably handle this more intelligently @@ -484,10 +498,18 @@ def _is_membership_change_allowed( if target_banned: raise AuthError(403, "%s is banned from the room" % (target_user_id,)) elif target_in_room: # the target is already in the room. - raise AuthError(403, "%s is already in the room." % target_user_id) + raise UnstableSpecAuthError( + 403, + "%s is already in the room." % target_user_id, + errcode=Codes.ALREADY_JOINED, + ) else: if user_level < invite_level: - raise AuthError(403, "You don't have permission to invite users") + raise UnstableSpecAuthError( + 403, + "You don't have permission to invite users", + errcode=Codes.INSUFFICIENT_POWER, + ) elif Membership.JOIN == membership: # Joins are valid iff caller == target and: # * They are not banned. @@ -549,15 +571,27 @@ def _is_membership_change_allowed( elif Membership.LEAVE == membership: # TODO (erikj): Implement kicks. if target_banned and user_level < ban_level: - raise AuthError(403, "You cannot unban user %s." % (target_user_id,)) + raise UnstableSpecAuthError( + 403, + "You cannot unban user %s." % (target_user_id,), + errcode=Codes.INSUFFICIENT_POWER, + ) elif target_user_id != event.user_id: kick_level = get_named_level(auth_events, "kick", 50) if user_level < kick_level or user_level <= target_level: - raise AuthError(403, "You cannot kick user %s." % target_user_id) + raise UnstableSpecAuthError( + 403, + "You cannot kick user %s." % target_user_id, + errcode=Codes.INSUFFICIENT_POWER, + ) elif Membership.BAN == membership: if user_level < ban_level or user_level <= target_level: - raise AuthError(403, "You don't have permission to ban") + raise UnstableSpecAuthError( + 403, + "You don't have permission to ban", + errcode=Codes.INSUFFICIENT_POWER, + ) elif room_version.msc2403_knocking and Membership.KNOCK == membership: if join_rule != JoinRules.KNOCK and ( not room_version.msc3787_knock_restricted_join_rule @@ -567,7 +601,11 @@ def _is_membership_change_allowed( elif target_user_id != event.user_id: raise AuthError(403, "You cannot knock for other users") elif target_in_room: - raise AuthError(403, "You cannot knock on a room you are already in") + raise UnstableSpecAuthError( + 403, + "You cannot knock on a room you are already in", + errcode=Codes.ALREADY_JOINED, + ) elif caller_invited: raise AuthError(403, "You are already invited to this room") elif target_banned: @@ -638,10 +676,11 @@ def _can_send_event(event: "EventBase", auth_events: StateMap["EventBase"]) -> b user_level = get_user_power_level(event.user_id, auth_events) if user_level < send_level: - raise AuthError( + raise UnstableSpecAuthError( 403, "You don't have permission to post that to the room. " + "user_level (%d) < send_level (%d)" % (user_level, send_level), + errcode=Codes.INSUFFICIENT_POWER, ) # Check state_key @@ -716,9 +755,10 @@ def check_historical( historical_level = get_named_level(auth_events, "historical", 100) if user_level < historical_level: - raise AuthError( + raise UnstableSpecAuthError( 403, 'You don\'t have permission to send send historical related events ("insertion", "batch", and "marker")', + errcode=Codes.INSUFFICIENT_POWER, ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index ae550d3f4d..1d60137411 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -469,7 +469,7 @@ class FederationServer(FederationBase): ) for pdu in pdus_by_room[room_id]: event_id = pdu.event_id - pdu_results[event_id] = e.error_dict() + pdu_results[event_id] = e.error_dict(self.hs.config) return for pdu in pdus_by_room[room_id]: diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 3d83236b0c..bfa5535044 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -565,7 +565,7 @@ class AuthHandler: except LoginError as e: # this step failed. Merge the error dict into the response # so that the client can have another go. - errordict = e.error_dict() + errordict = e.error_dict(self.hs.config) creds = await self.store.get_completed_ui_auth_stages(session.session_id) for f in flows: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e0bcc40b93..e85b540451 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -41,6 +41,7 @@ from synapse.api.errors import ( NotFoundError, ShadowBanError, SynapseError, + UnstableSpecAuthError, UnsupportedRoomVersionError, ) from synapse.api.room_versions import KNOWN_ROOM_VERSIONS @@ -149,7 +150,11 @@ class MessageHandler: "Attempted to retrieve data from a room for a user that has never been in it. " "This should not have happened." ) - raise SynapseError(403, "User not in room", errcode=Codes.FORBIDDEN) + raise UnstableSpecAuthError( + 403, + "User not in room", + errcode=Codes.NOT_JOINED, + ) return data @@ -334,7 +339,11 @@ class MessageHandler: break else: # Loop fell through, AS has no interested users in room - raise AuthError(403, "Appservice not in room") + raise UnstableSpecAuthError( + 403, + "Appservice not in room", + errcode=Codes.NOT_JOINED, + ) return { user_id: { diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index 85811b5bde..ebd445adca 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -28,11 +28,11 @@ from synapse.api.constants import ( RoomTypes, ) from synapse.api.errors import ( - AuthError, Codes, NotFoundError, StoreError, SynapseError, + UnstableSpecAuthError, UnsupportedRoomVersionError, ) from synapse.api.ratelimiting import Ratelimiter @@ -175,10 +175,11 @@ class RoomSummaryHandler: # First of all, check that the room is accessible. if not await self._is_local_room_accessible(requested_room_id, requester): - raise AuthError( + raise UnstableSpecAuthError( 403, "User %s not in room %s, and room previews are disabled" % (requester, requested_room_id), + errcode=Codes.NOT_JOINED, ) # If this is continuing a previous session, pull the persisted data. diff --git a/synapse/http/server.py b/synapse/http/server.py index cf2d6f904b..19f42159b8 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -58,6 +58,7 @@ from synapse.api.errors import ( SynapseError, UnrecognizedRequestError, ) +from synapse.config.homeserver import HomeServerConfig from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background from synapse.logging.opentracing import active_span, start_active_span, trace_servlet @@ -155,15 +156,16 @@ def is_method_cancellable(method: Callable[..., Any]) -> bool: return getattr(method, "cancellable", False) -def return_json_error(f: failure.Failure, request: SynapseRequest) -> None: +def return_json_error( + f: failure.Failure, request: SynapseRequest, config: Optional[HomeServerConfig] +) -> None: """Sends a JSON error response to clients.""" if f.check(SynapseError): # mypy doesn't understand that f.check asserts the type. exc: SynapseError = f.value # type: ignore error_code = exc.code - error_dict = exc.error_dict() - + error_dict = exc.error_dict(config) logger.info("%s SynapseError: %s - %s", request, error_code, exc.msg) elif f.check(CancelledError): error_code = HTTP_STATUS_REQUEST_CANCELLED @@ -450,7 +452,7 @@ class DirectServeJsonResource(_AsyncResource): request: SynapseRequest, ) -> None: """Implements _AsyncResource._send_error_response""" - return_json_error(f, request) + return_json_error(f, request, None) @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -575,6 +577,14 @@ class JsonResource(DirectServeJsonResource): return callback_return + def _send_error_response( + self, + f: failure.Failure, + request: SynapseRequest, + ) -> None: + """Implements _AsyncResource._send_error_response""" + return_json_error(f, request, self.hs.config) + class DirectServeHtmlResource(_AsyncResource): """A resource that will call `self._async_on_` on new requests, diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 9a48e9286f..18a7195409 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -20,6 +20,7 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventTypes, LoginType, Membership from synapse.api.errors import SynapseError from synapse.api.room_versions import RoomVersion +from synapse.config.homeserver import HomeServerConfig from synapse.events import EventBase from synapse.events.third_party_rules import load_legacy_third_party_event_rules from synapse.rest import admin @@ -185,12 +186,12 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): """ class NastyHackException(SynapseError): - def error_dict(self) -> JsonDict: + def error_dict(self, config: Optional[HomeServerConfig]) -> JsonDict: """ This overrides SynapseError's `error_dict` to nastily inject JSON into the error response. """ - result = super().error_dict() + result = super().error_dict(config) result["nasty"] = "very" return result -- cgit 1.4.1 From 92d21faf12c982a8d27ad465eb94f2fed0e8b32f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Aug 2022 10:57:38 -0500 Subject: Instrument `/messages` for understandable traces in Jaeger (#13368) In Jaeger: - Before: huge list of uncategorized database calls - After: nice and collapsible into units of work --- changelog.d/13368.misc | 1 + synapse/api/auth.py | 8 +++++++- synapse/federation/federation_client.py | 2 ++ synapse/handlers/federation.py | 2 ++ synapse/handlers/federation_event.py | 5 +++++ synapse/handlers/pagination.py | 2 ++ synapse/handlers/relations.py | 2 ++ synapse/storage/controllers/state.py | 5 +++++ synapse/storage/databases/main/stream.py | 2 ++ synapse/streams/events.py | 2 ++ synapse/visibility.py | 2 ++ 11 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13368.misc (limited to 'synapse/federation') diff --git a/changelog.d/13368.misc b/changelog.d/13368.misc new file mode 100644 index 0000000000..4b433a5107 --- /dev/null +++ b/changelog.d/13368.misc @@ -0,0 +1 @@ +Instrument `/messages` for understandable traces in Jaeger. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 82e6475ef5..523bad0c55 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -31,7 +31,12 @@ from synapse.api.errors import ( from synapse.appservice import ApplicationService from synapse.http import get_request_user_agent from synapse.http.site import SynapseRequest -from synapse.logging.opentracing import active_span, force_tracing, start_active_span +from synapse.logging.opentracing import ( + active_span, + force_tracing, + start_active_span, + trace, +) from synapse.storage.databases.main.registration import TokenLookupResult from synapse.types import Requester, UserID, create_requester @@ -567,6 +572,7 @@ class Auth: return query_params[0].decode("ascii") + @trace async def check_user_in_room_or_world_readable( self, room_id: str, user_id: str, allow_departed_users: bool = False ) -> Tuple[str, Optional[str]]: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 6a8d76529b..54ffbd8170 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -61,6 +61,7 @@ from synapse.federation.federation_base import ( ) from synapse.federation.transport.client import SendJoinResponse from synapse.http.types import QueryParams +from synapse.logging.opentracing import trace from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -233,6 +234,7 @@ class FederationClient(FederationBase): destination, content, timeout ) + @trace async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> Optional[List[EventBase]]: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 57ad6e5dce..30f1585a85 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,6 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context +from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -180,6 +181,7 @@ class FederationHandler: "resume_sync_partial_state_room", self._resume_sync_partial_state_room ) + @trace async def maybe_backfill( self, room_id: str, current_depth: int, limit: int ) -> bool: diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 91d1439191..8968b705d4 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -59,6 +59,7 @@ from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.federation.federation_client import InvalidResponseError from synapse.logging.context import nested_logging_context +from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( @@ -566,6 +567,7 @@ class FederationEventHandler: event.event_id ) + @trace async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> None: @@ -610,6 +612,7 @@ class FederationEventHandler: backfilled=True, ) + @trace async def _get_missing_events_for_pdu( self, origin: str, pdu: EventBase, prevs: Set[str], min_depth: int ) -> None: @@ -710,6 +713,7 @@ class FederationEventHandler: logger.info("Got %d prev_events", len(missing_events)) await self._process_pulled_events(origin, missing_events, backfilled=False) + @trace async def _process_pulled_events( self, origin: str, events: Iterable[EventBase], backfilled: bool ) -> None: @@ -748,6 +752,7 @@ class FederationEventHandler: with nested_logging_context(ev.event_id): await self._process_pulled_event(origin, ev, backfilled=backfilled) + @trace async def _process_pulled_event( self, origin: str, event: EventBase, backfilled: bool ) -> None: diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 6262a35822..e1e34e3b16 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -24,6 +24,7 @@ from synapse.api.errors import SynapseError from synapse.api.filtering import Filter from synapse.events.utils import SerializeEventConfig from synapse.handlers.room import ShutdownRoomResponse +from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig @@ -416,6 +417,7 @@ class PaginationHandler: await self._storage_controllers.purge_events.purge_room(room_id) + @trace async def get_messages( self, requester: Requester, diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index 8f797e3ae9..72d25df8c8 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -19,6 +19,7 @@ import attr from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError from synapse.events import EventBase, relation_from_event +from synapse.logging.opentracing import trace from synapse.storage.databases.main.relations import _RelatedEvent from synapse.types import JsonDict, Requester, StreamToken, UserID from synapse.visibility import filter_events_for_client @@ -361,6 +362,7 @@ class RelationsHandler: return results + @trace async def get_bundled_aggregations( self, events: Iterable[EventBase], user_id: str ) -> Dict[str, BundledAggregations]: diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 1e35046e07..0d480f1014 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -29,6 +29,7 @@ from typing import ( from synapse.api.constants import EventTypes from synapse.events import EventBase +from synapse.logging.opentracing import trace from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, @@ -179,6 +180,7 @@ class StateStorageController: return self.stores.state._get_state_groups_from_groups(groups, state_filter) + @trace async def get_state_for_events( self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None ) -> Dict[str, StateMap[EventBase]]: @@ -225,6 +227,7 @@ class StateStorageController: return {event: event_to_state[event] for event in event_ids} + @trace async def get_state_ids_for_events( self, event_ids: Collection[str], @@ -287,6 +290,7 @@ class StateStorageController: ) return state_map[event_id] + @trace async def get_state_ids_for_event( self, event_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[str]: @@ -327,6 +331,7 @@ class StateStorageController: groups, state_filter or StateFilter.all() ) + @trace async def get_state_group_for_events( self, event_ids: Collection[str], diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 2590b52f73..a347430aa7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -58,6 +58,7 @@ from twisted.internet import defer from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.logging.opentracing import trace from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -1346,6 +1347,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): return rows, next_token + @trace async def paginate_room_events( self, room_id: str, diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 54e0b1a23b..bcd840bd88 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -21,6 +21,7 @@ from synapse.handlers.presence import PresenceEventSource from synapse.handlers.receipts import ReceiptEventSource from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource +from synapse.logging.opentracing import trace from synapse.streams import EventSource from synapse.types import StreamToken @@ -69,6 +70,7 @@ class EventSources: ) return token + @trace async def get_current_token_for_pagination(self, room_id: str) -> StreamToken: """Get the current token for a given room to be used to paginate events. diff --git a/synapse/visibility.py b/synapse/visibility.py index 9abbaa5a64..d947edde66 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -23,6 +23,7 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.events.utils import prune_event +from synapse.logging.opentracing import trace from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore from synapse.storage.state import StateFilter @@ -51,6 +52,7 @@ MEMBERSHIP_PRIORITY = ( _HISTORY_VIS_KEY: Final[Tuple[str, str]] = (EventTypes.RoomHistoryVisibility, "") +@trace async def filter_events_for_client( storage: StorageControllers, user_id: str, -- cgit 1.4.1 From e9e6aacfbe016922ed6a25071c0e9ce77334b4e8 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 4 Aug 2022 15:27:04 +0000 Subject: Faster Room Joins: prevent Synapse from answering federated join requests for a room which it has not fully joined yet. (#13416) --- changelog.d/13416.misc | 1 + synapse/federation/federation_server.py | 17 +++++++++++++++++ synapse/handlers/federation.py | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 changelog.d/13416.misc (limited to 'synapse/federation') diff --git a/changelog.d/13416.misc b/changelog.d/13416.misc new file mode 100644 index 0000000000..2904e73376 --- /dev/null +++ b/changelog.d/13416.misc @@ -0,0 +1 @@ +Faster Room Joins: prevent Synapse from answering federated join requests for a room which it has not fully joined yet. \ No newline at end of file diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 1d60137411..db4b83a505 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -843,8 +843,25 @@ class FederationServer(FederationBase): Codes.BAD_JSON, ) + # Note that get_room_version throws if the room does not exist here. room_version = await self.store.get_room_version(room_id) + if await self.store.is_partial_state_room(room_id): + # If our server is still only partially joined, we can't give a complete + # response to /send_join, /send_knock or /send_leave. + # This is because we will not be able to provide the server list (for partial + # joins) or the full state (for full joins). + # Return a 404 as we would if we weren't in the room at all. + logger.info( + f"Rejecting /send_{membership_type} to %s because it's a partial state room", + room_id, + ) + raise SynapseError( + 404, + f"Unable to handle /send_{membership_type} right now; this server is not fully joined.", + errcode=Codes.NOT_FOUND, + ) + if membership_type == Membership.KNOCK and not room_version.msc2403_knocking: raise SynapseError( 403, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 30f1585a85..5042236742 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -754,6 +754,23 @@ class FederationHandler: # (and return a 404 otherwise) room_version = await self.store.get_room_version(room_id) + if await self.store.is_partial_state_room(room_id): + # If our server is still only partially joined, we can't give a complete + # response to /make_join, so return a 404 as we would if we weren't in the + # room at all. + # The main reason we can't respond properly is that we need to know about + # the auth events for the join event that we would return. + # We also should not bother entertaining the /make_join since we cannot + # handle the /send_join. + logger.info( + "Rejecting /make_join to %s because it's a partial state room", room_id + ) + raise SynapseError( + 404, + "Unable to handle /make_join right now; this server is not fully joined.", + errcode=Codes.NOT_FOUND, + ) + # now check that we are *still* in the room is_in_room = await self._event_auth_handler.check_host_in_room( room_id, self.server_name -- cgit 1.4.1 From 344a2f767c636259412f7fc2914c1554a5c4dc1d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Aug 2022 13:41:23 -0500 Subject: Instrument `FederationStateIdsServlet` - `/state_ids` (#13499) Instrument FederationStateIdsServlet - `/state_ids` so it's easier to follow what's going on in Jaeger when viewing a trace. --- changelog.d/13499.misc | 1 + synapse/federation/federation_server.py | 11 ++++++++++- synapse/handlers/federation.py | 4 +++- synapse/storage/databases/main/event_federation.py | 3 +++ synapse/util/ratelimitutils.py | 4 ++++ 5 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13499.misc (limited to 'synapse/federation') diff --git a/changelog.d/13499.misc b/changelog.d/13499.misc new file mode 100644 index 0000000000..99dbcebec8 --- /dev/null +++ b/changelog.d/13499.misc @@ -0,0 +1 @@ +Instrument `FederationStateIdsServlet` (`/state_ids`) for understandable traces in Jaeger. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index db4b83a505..75fbc6073d 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -61,7 +61,12 @@ from synapse.logging.context import ( nested_logging_context, run_in_background, ) -from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace +from synapse.logging.opentracing import ( + log_kv, + start_active_span_from_edu, + tag_args, + trace, +) from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -547,6 +552,8 @@ class FederationServer(FederationBase): return 200, resp + @trace + @tag_args async def on_state_ids_request( self, origin: str, room_id: str, event_id: str ) -> Tuple[int, JsonDict]: @@ -569,6 +576,8 @@ class FederationServer(FederationBase): return 200, resp + @trace + @tag_args async def _on_state_ids_request_compute( self, room_id: str, event_id: str ) -> JsonDict: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5042236742..6f5ab86ac4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -1081,6 +1081,8 @@ class FederationHandler: return event + @trace + @tag_args async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: """Returns the state at the event. i.e. not including said event.""" event = await self.store.get_event(event_id, check_room_id=room_id) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index eec55b6478..0bc8401f2b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,6 +33,7 @@ from synapse.api.constants import MAX_DEPTH, EventTypes from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( @@ -126,6 +127,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) return await self.get_events_as_list(event_ids) + @trace + @tag_args async def get_auth_chain_ids( self, room_id: str, diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 6394cc39ac..e1beaec5a3 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -27,6 +27,7 @@ from synapse.logging.context import ( make_deferred_yieldable, run_in_background, ) +from synapse.logging.opentracing import start_active_span from synapse.util import Clock if typing.TYPE_CHECKING: @@ -176,8 +177,11 @@ class _PerHostRatelimiter: # Ensure that we've properly cleaned up. self.sleeping_requests.discard(request_id) self.ready_request_queue.pop(request_id, None) + wait_span_scope.__exit__(None, None, None) return r + wait_span_scope = start_active_span("ratelimit wait") + wait_span_scope.__enter__() ret_defer.addCallbacks(on_start, on_err) ret_defer.addBoth(on_both) return make_deferred_yieldable(ret_defer) -- cgit 1.4.1 From 0a4efbc1ddc3a58a6d75ad5d4d960b9ed367481e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Aug 2022 12:39:40 -0500 Subject: Instrument the federation/backfill part of `/messages` (#13489) Instrument the federation/backfill part of `/messages` so it's easier to follow what's going on in Jaeger when viewing a trace. Split out from https://github.com/matrix-org/synapse/pull/13440 Follow-up from https://github.com/matrix-org/synapse/pull/13368 Part of https://github.com/matrix-org/synapse/issues/13356 --- changelog.d/13489.misc | 1 + synapse/federation/federation_client.py | 27 ++++- synapse/handlers/federation.py | 10 +- synapse/handlers/federation_event.py | 112 ++++++++++++++++++--- synapse/logging/opentracing.py | 19 +++- synapse/storage/controllers/persist_events.py | 30 ++++-- synapse/storage/controllers/state.py | 5 +- synapse/storage/databases/main/event_federation.py | 6 ++ synapse/storage/databases/main/events.py | 2 + synapse/storage/databases/main/events_worker.py | 38 +++++-- .../storage/util/partial_state_events_tracker.py | 3 + 11 files changed, 220 insertions(+), 33 deletions(-) create mode 100644 changelog.d/13489.misc (limited to 'synapse/federation') diff --git a/changelog.d/13489.misc b/changelog.d/13489.misc new file mode 100644 index 0000000000..5e4853860e --- /dev/null +++ b/changelog.d/13489.misc @@ -0,0 +1 @@ +Instrument the federation/backfill part of `/messages` for understandable traces in Jaeger. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 54ffbd8170..987f6dad46 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -61,7 +61,7 @@ from synapse.federation.federation_base import ( ) from synapse.federation.transport.client import SendJoinResponse from synapse.http.types import QueryParams -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -235,6 +235,7 @@ class FederationClient(FederationBase): ) @trace + @tag_args async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> Optional[List[EventBase]]: @@ -337,6 +338,8 @@ class FederationClient(FederationBase): return None + @trace + @tag_args async def get_pdu( self, destinations: Iterable[str], @@ -448,6 +451,8 @@ class FederationClient(FederationBase): return event_copy + @trace + @tag_args async def get_room_state_ids( self, destination: str, room_id: str, event_id: str ) -> Tuple[List[str], List[str]]: @@ -467,6 +472,23 @@ class FederationClient(FederationBase): state_event_ids = result["pdu_ids"] auth_event_ids = result.get("auth_chain_ids", []) + set_tag( + SynapseTags.RESULT_PREFIX + "state_event_ids", + str(state_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "state_event_ids.length", + str(len(state_event_ids)), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "auth_event_ids", + str(auth_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "auth_event_ids.length", + str(len(auth_event_ids)), + ) + if not isinstance(state_event_ids, list) or not isinstance( auth_event_ids, list ): @@ -474,6 +496,8 @@ class FederationClient(FederationBase): return state_event_ids, auth_event_ids + @trace + @tag_args async def get_room_state( self, destination: str, @@ -533,6 +557,7 @@ class FederationClient(FederationBase): return valid_state_events, valid_auth_events + @trace async def _check_sigs_and_hash_and_fetch( self, origin: str, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6f5ab86ac4..d13011d138 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import tag_args, trace +from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -370,6 +370,14 @@ class FederationHandler: logger.debug( "_maybe_backfill_inner: extremities_to_request %s", extremities_to_request ) + set_tag( + SynapseTags.RESULT_PREFIX + "extremities_to_request", + str(extremities_to_request), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "extremities_to_request.length", + str(len(extremities_to_request)), + ) # Now we need to decide which hosts to hit first. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 8968b705d4..dd0d610fe9 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -59,7 +59,13 @@ from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.federation.federation_client import InvalidResponseError from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import ( + SynapseTags, + set_tag, + start_active_span, + tag_args, + trace, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( @@ -410,6 +416,7 @@ class FederationEventHandler: prev_member_event, ) + @trace async def process_remote_join( self, origin: str, @@ -715,7 +722,7 @@ class FederationEventHandler: @trace async def _process_pulled_events( - self, origin: str, events: Iterable[EventBase], backfilled: bool + self, origin: str, events: Collection[EventBase], backfilled: bool ) -> None: """Process a batch of events we have pulled from a remote server @@ -730,6 +737,15 @@ class FederationEventHandler: backfilled: True if this is part of a historical batch of events (inhibits notification to clients, and validation of device keys.) """ + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str([event.event_id for event in events]), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(events)), + ) + set_tag(SynapseTags.FUNC_ARG_PREFIX + "backfilled", str(backfilled)) logger.debug( "processing pulled backfilled=%s events=%s", backfilled, @@ -753,6 +769,7 @@ class FederationEventHandler: await self._process_pulled_event(origin, ev, backfilled=backfilled) @trace + @tag_args async def _process_pulled_event( self, origin: str, event: EventBase, backfilled: bool ) -> None: @@ -854,6 +871,7 @@ class FederationEventHandler: else: raise + @trace async def _compute_event_context_with_maybe_missing_prevs( self, dest: str, event: EventBase ) -> EventContext: @@ -970,6 +988,8 @@ class FederationEventHandler: event, state_ids_before_event=state_map, partial_state=partial_state ) + @trace + @tag_args async def _get_state_ids_after_missing_prev_event( self, destination: str, @@ -1009,10 +1029,10 @@ class FederationEventHandler: logger.debug("Fetching %i events from cache/store", len(desired_events)) have_events = await self._store.have_seen_events(room_id, desired_events) - missing_desired_events = desired_events - have_events + missing_desired_event_ids = desired_events - have_events logger.debug( "We are missing %i events (got %i)", - len(missing_desired_events), + len(missing_desired_event_ids), len(have_events), ) @@ -1024,13 +1044,30 @@ class FederationEventHandler: # already have a bunch of the state events. It would be nice if the # federation api gave us a way of finding out which we actually need. - missing_auth_events = set(auth_event_ids) - have_events - missing_auth_events.difference_update( - await self._store.have_seen_events(room_id, missing_auth_events) + missing_auth_event_ids = set(auth_event_ids) - have_events + missing_auth_event_ids.difference_update( + await self._store.have_seen_events(room_id, missing_auth_event_ids) ) - logger.debug("We are also missing %i auth events", len(missing_auth_events)) + logger.debug("We are also missing %i auth events", len(missing_auth_event_ids)) - missing_events = missing_desired_events | missing_auth_events + missing_event_ids = missing_desired_event_ids | missing_auth_event_ids + + set_tag( + SynapseTags.RESULT_PREFIX + "missing_auth_event_ids", + str(missing_auth_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_auth_event_ids.length", + str(len(missing_auth_event_ids)), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_desired_event_ids", + str(missing_desired_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_desired_event_ids.length", + str(len(missing_desired_event_ids)), + ) # Making an individual request for each of 1000s of events has a lot of # overhead. On the other hand, we don't really want to fetch all of the events @@ -1041,13 +1078,13 @@ class FederationEventHandler: # # TODO: might it be better to have an API which lets us do an aggregate event # request - if (len(missing_events) * 10) >= len(auth_event_ids) + len(state_event_ids): + if (len(missing_event_ids) * 10) >= len(auth_event_ids) + len(state_event_ids): logger.debug("Requesting complete state from remote") await self._get_state_and_persist(destination, room_id, event_id) else: - logger.debug("Fetching %i events from remote", len(missing_events)) + logger.debug("Fetching %i events from remote", len(missing_event_ids)) await self._get_events_and_persist( - destination=destination, room_id=room_id, event_ids=missing_events + destination=destination, room_id=room_id, event_ids=missing_event_ids ) # We now need to fill out the state map, which involves fetching the @@ -1104,6 +1141,14 @@ class FederationEventHandler: event_id, failed_to_fetch, ) + set_tag( + SynapseTags.RESULT_PREFIX + "failed_to_fetch", + str(failed_to_fetch), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "failed_to_fetch.length", + str(len(failed_to_fetch)), + ) if remote_event.is_state() and remote_event.rejected_reason is None: state_map[ @@ -1112,6 +1157,8 @@ class FederationEventHandler: return state_map + @trace + @tag_args async def _get_state_and_persist( self, destination: str, room_id: str, event_id: str ) -> None: @@ -1133,6 +1180,7 @@ class FederationEventHandler: destination=destination, room_id=room_id, event_ids=(event_id,) ) + @trace async def _process_received_pdu( self, origin: str, @@ -1283,6 +1331,7 @@ class FederationEventHandler: except Exception: logger.exception("Failed to resync device for %s", sender) + @trace async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> None: """Handles backfilling the insertion event when we receive a marker event that points to one. @@ -1414,6 +1463,8 @@ class FederationEventHandler: return event_from_response + @trace + @tag_args async def _get_events_and_persist( self, destination: str, room_id: str, event_ids: Collection[str] ) -> None: @@ -1459,6 +1510,7 @@ class FederationEventHandler: logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) await self._auth_and_persist_outliers(room_id, events) + @trace async def _auth_and_persist_outliers( self, room_id: str, events: Iterable[EventBase] ) -> None: @@ -1477,6 +1529,16 @@ class FederationEventHandler: """ event_map = {event.event_id: event for event in events} + event_ids = event_map.keys() + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str(event_ids), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(event_ids)), + ) + # filter out any events we have already seen. This might happen because # the events were eagerly pushed to us (eg, during a room join), or because # another thread has raced against us since we decided to request the event. @@ -1593,6 +1655,7 @@ class FederationEventHandler: backfilled=True, ) + @trace async def _check_event_auth( self, origin: Optional[str], event: EventBase, context: EventContext ) -> None: @@ -1631,6 +1694,14 @@ class FederationEventHandler: claimed_auth_events = await self._load_or_fetch_auth_events_for_event( origin, event ) + set_tag( + SynapseTags.RESULT_PREFIX + "claimed_auth_events", + str([ev.event_id for ev in claimed_auth_events]), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "claimed_auth_events.length", + str(len(claimed_auth_events)), + ) # ... and check that the event passes auth at those auth events. # https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu: @@ -1728,6 +1799,7 @@ class FederationEventHandler: ) context.rejected = RejectedReason.AUTH_ERROR + @trace async def _maybe_kick_guest_users(self, event: EventBase) -> None: if event.type != EventTypes.GuestAccess: return @@ -1935,6 +2007,8 @@ class FederationEventHandler: # instead we raise an AuthError, which will make the caller ignore it. raise AuthError(code=HTTPStatus.FORBIDDEN, msg="Auth events could not be found") + @trace + @tag_args async def _get_remote_auth_chain_for_event( self, destination: str, room_id: str, event_id: str ) -> None: @@ -1963,6 +2037,7 @@ class FederationEventHandler: await self._auth_and_persist_outliers(room_id, remote_auth_events) + @trace async def _run_push_actions_and_persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False ) -> None: @@ -2071,8 +2146,17 @@ class FederationEventHandler: self._message_handler.maybe_schedule_expiry(event) if not backfilled: # Never notify for backfilled events - for event in events: - await self._notify_persisted_event(event, max_stream_token) + with start_active_span("notify_persisted_events"): + set_tag( + SynapseTags.RESULT_PREFIX + "event_ids", + str([ev.event_id for ev in events]), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "event_ids.length", + str(len(events)), + ) + for event in events: + await self._notify_persisted_event(event, max_stream_token) return max_stream_token.stream diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index d1fa2cf8ae..482316a1ff 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -310,6 +310,19 @@ class SynapseTags: # The name of the external cache CACHE_NAME = "cache.name" + # Used to tag function arguments + # + # Tag a named arg. The name of the argument should be appended to this prefix. + FUNC_ARG_PREFIX = "ARG." + # Tag extra variadic number of positional arguments (`def foo(first, second, *extras)`) + FUNC_ARGS = "args" + # Tag keyword args + FUNC_KWARGS = "kwargs" + + # Some intermediate result that's interesting to the function. The label for + # the result should be appended to this prefix. + RESULT_PREFIX = "RESULT." + class SynapseBaggage: FORCE_TRACING = "synapse-force-tracing" @@ -967,9 +980,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: # first argument only if it's named `self` or `cls`. This isn't fool-proof # but handles the idiomatic cases. for i, arg in enumerate(args[1:], start=1): # type: ignore[index] - set_tag("ARG_" + argspec.args[i], str(arg)) - set_tag("args", str(args[len(argspec.args) :])) # type: ignore[index] - set_tag("kwargs", str(kwargs)) + set_tag(SynapseTags.FUNC_ARG_PREFIX + argspec.args[i], str(arg)) + set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :])) # type: ignore[index] + set_tag(SynapseTags.FUNC_KWARGS, str(kwargs)) yield return _custom_sync_async_decorator(func, _wrapping_logic) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index cf98b0ab48..dad3731b9b 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -45,8 +45,14 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext -from synapse.logging import opentracing from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable +from synapse.logging.opentracing import ( + SynapseTags, + active_span, + set_tag, + start_active_span_follows_from, + trace, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases @@ -223,7 +229,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): queue.append(end_item) # also add our active opentracing span to the item so that we get a link back - span = opentracing.active_span() + span = active_span() if span: end_item.parent_opentracing_span_contexts.append(span.context) @@ -234,7 +240,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): res = await make_deferred_yieldable(end_item.deferred.observe()) # add another opentracing span which links to the persist trace. - with opentracing.start_active_span_follows_from( + with start_active_span_follows_from( f"{task.name}_complete", (end_item.opentracing_span_context,) ): pass @@ -266,7 +272,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): queue = self._get_drainining_queue(room_id) for item in queue: try: - with opentracing.start_active_span_follows_from( + with start_active_span_follows_from( item.task.name, item.parent_opentracing_span_contexts, inherit_force_tracing=True, @@ -355,7 +361,7 @@ class EventsPersistenceStorageController: f"Found an unexpected task type in event persistence queue: {task}" ) - @opentracing.trace + @trace async def persist_events( self, events_and_contexts: Iterable[Tuple[EventBase, EventContext]], @@ -380,9 +386,21 @@ class EventsPersistenceStorageController: PartialStateConflictError: if attempting to persist a partial state event in a room that has been un-partial stated. """ + event_ids: List[str] = [] partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) + event_ids.append(event.event_id) + + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str(event_ids), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(event_ids)), + ) + set_tag(SynapseTags.FUNC_ARG_PREFIX + "backfilled", str(backfilled)) async def enqueue( item: Tuple[str, List[Tuple[EventBase, EventContext]]] @@ -418,7 +436,7 @@ class EventsPersistenceStorageController: self.main_store.get_room_max_token(), ) - @opentracing.trace + @trace async def persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False ) -> Tuple[EventBase, PersistedEventPosition, RoomStreamToken]: diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 0c78eb735e..1ad002f57b 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -29,7 +29,7 @@ from typing import ( from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import tag_args, trace from synapse.storage.roommember import ProfileInfo from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( @@ -229,6 +229,7 @@ class StateStorageController: return {event: event_to_state[event] for event in event_ids} @trace + @tag_args async def get_state_ids_for_events( self, event_ids: Collection[str], @@ -333,6 +334,7 @@ class StateStorageController: ) @trace + @tag_args async def get_state_group_for_events( self, event_ids: Collection[str], @@ -474,6 +476,7 @@ class StateStorageController: prev_stream_id, max_stream_id ) + @trace async def get_current_state( self, room_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[EventBase]: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 0bc8401f2b..c836078da6 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -712,6 +712,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # Return all events where not all sets can reach them. return {eid for eid, n in event_to_missing_sets.items() if n} + @trace + @tag_args async def get_oldest_event_ids_with_depth_in_room( self, room_id: str ) -> List[Tuple[str, int]]: @@ -770,6 +772,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas room_id, ) + @trace async def get_insertion_event_backward_extremities_in_room( self, room_id: str ) -> List[Tuple[str, int]]: @@ -1342,6 +1345,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas event_results.reverse() return event_results + @trace + @tag_args async def get_successor_events(self, event_id: str) -> List[str]: """Fetch all events that have the given event as a prev event @@ -1378,6 +1383,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas _delete_old_forward_extrem_cache_txn, ) + @trace async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None: await self.db_pool.simple_upsert( table="insertion_event_extremities", diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 5560b38a48..a4010ee28d 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -40,6 +40,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext +from synapse.logging.opentracing import trace from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -145,6 +146,7 @@ class PersistEventsStore: self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen + @trace async def _persist_events_and_state_updates( self, events_and_contexts: List[Tuple[EventBase, EventContext]], diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index b07d812ae2..8a7cdb024d 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -54,6 +54,7 @@ from synapse.logging.context import ( current_context, make_deferred_yieldable, ) +from synapse.logging.opentracing import start_active_span, tag_args, trace from synapse.metrics.background_process_metrics import ( run_as_background_process, wrap_as_background_process, @@ -430,6 +431,8 @@ class EventsWorkerStore(SQLBaseStore): return {e.event_id: e for e in events} + @trace + @tag_args async def get_events_as_list( self, event_ids: Collection[str], @@ -1090,23 +1093,42 @@ class EventsWorkerStore(SQLBaseStore): """ fetched_event_ids: Set[str] = set() fetched_events: Dict[str, _EventRow] = {} - events_to_fetch = event_ids - while events_to_fetch: - row_map = await self._enqueue_events(events_to_fetch) + async def _fetch_event_ids_and_get_outstanding_redactions( + event_ids_to_fetch: Collection[str], + ) -> Collection[str]: + """ + Fetch all of the given event_ids and return any associated redaction event_ids + that we still need to fetch in the next iteration. + """ + row_map = await self._enqueue_events(event_ids_to_fetch) # we need to recursively fetch any redactions of those events redaction_ids: Set[str] = set() - for event_id in events_to_fetch: + for event_id in event_ids_to_fetch: row = row_map.get(event_id) fetched_event_ids.add(event_id) if row: fetched_events[event_id] = row redaction_ids.update(row.redactions) - events_to_fetch = redaction_ids.difference(fetched_event_ids) - if events_to_fetch: - logger.debug("Also fetching redaction events %s", events_to_fetch) + event_ids_to_fetch = redaction_ids.difference(fetched_event_ids) + return event_ids_to_fetch + + # Grab the initial list of events requested + event_ids_to_fetch = await _fetch_event_ids_and_get_outstanding_redactions( + event_ids + ) + # Then go and recursively find all of the associated redactions + with start_active_span("recursively fetching redactions"): + while event_ids_to_fetch: + logger.debug("Also fetching redaction events %s", event_ids_to_fetch) + + event_ids_to_fetch = ( + await _fetch_event_ids_and_get_outstanding_redactions( + event_ids_to_fetch + ) + ) # build a map from event_id to EventBase event_map: Dict[str, EventBase] = {} @@ -1424,6 +1446,8 @@ class EventsWorkerStore(SQLBaseStore): return {r["event_id"] for r in rows} + @trace + @tag_args async def have_seen_events( self, room_id: str, event_ids: Iterable[str] ) -> Set[str]: diff --git a/synapse/storage/util/partial_state_events_tracker.py b/synapse/storage/util/partial_state_events_tracker.py index 466e5137f2..b4bf49dace 100644 --- a/synapse/storage/util/partial_state_events_tracker.py +++ b/synapse/storage/util/partial_state_events_tracker.py @@ -20,6 +20,7 @@ from twisted.internet import defer from twisted.internet.defer import Deferred from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable +from synapse.logging.opentracing import trace_with_opname from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.room import RoomWorkerStore from synapse.util import unwrapFirstError @@ -58,6 +59,7 @@ class PartialStateEventsTracker: for o in observers: o.callback(None) + @trace_with_opname("PartialStateEventsTracker.await_full_state") async def await_full_state(self, event_ids: Collection[str]) -> None: """Wait for all the given events to have full state. @@ -151,6 +153,7 @@ class PartialCurrentStateTracker: for o in observers: o.callback(None) + @trace_with_opname("PartialCurrentStateTracker.await_full_state") async def await_full_state(self, room_id: str) -> None: # We add the deferred immediately so that the DB call to check for # partial state doesn't race when we unpartial the room. -- cgit 1.4.1 From 7af07f9716e6b4a2238ecd435e19bf8501360bc7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Aug 2022 21:53:37 -0500 Subject: Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls (#13588) Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls because I've see `_check_sigs_and_hash_and_fetch` take [10.41s to process 100 events](https://github.com/matrix-org/synapse/issues/13587) Fix https://github.com/matrix-org/synapse/issues/13587 Part of https://github.com/matrix-org/synapse/issues/13356 --- changelog.d/13588.misc | 1 + synapse/crypto/event_signing.py | 2 ++ synapse/events/spamcheck.py | 2 ++ synapse/federation/federation_base.py | 22 ++++++++++++++++++++++ synapse/federation/federation_client.py | 23 ++++++++++++++++++++--- 5 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13588.misc (limited to 'synapse/federation') diff --git a/changelog.d/13588.misc b/changelog.d/13588.misc new file mode 100644 index 0000000000..eca1416ceb --- /dev/null +++ b/changelog.d/13588.misc @@ -0,0 +1 @@ +Instrument `_check_sigs_and_hash_and_fetch` to trace time spent in child concurrent calls for understandable traces in Jaeger. diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 7520647d1e..23b799ac32 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -28,6 +28,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersion from synapse.events import EventBase from synapse.events.utils import prune_event, prune_event_dict +from synapse.logging.opentracing import trace from synapse.types import JsonDict logger = logging.getLogger(__name__) @@ -35,6 +36,7 @@ logger = logging.getLogger(__name__) Hasher = Callable[[bytes], "hashlib._Hash"] +@trace def check_event_content_hash( event: EventBase, hash_algorithm: Hasher = hashlib.sha256 ) -> bool: diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 4a3bfb38f1..623a2c71ea 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -32,6 +32,7 @@ from typing_extensions import Literal import synapse from synapse.api.errors import Codes +from synapse.logging.opentracing import trace from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour @@ -378,6 +379,7 @@ class SpamChecker: if check_media_file_for_spam is not None: self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam) + @trace async def check_event_for_spam( self, event: "synapse.events.EventBase" ) -> Union[Tuple[Codes, JsonDict], str]: diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 2522bf78fc..4269a98db2 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -23,6 +23,7 @@ from synapse.crypto.keyring import Keyring from synapse.events import EventBase, make_event_from_dict from synapse.events.utils import prune_event, validate_canonicaljson from synapse.http.servlet import assert_params_in_dict +from synapse.logging.opentracing import log_kv, trace from synapse.types import JsonDict, get_domain_from_id if TYPE_CHECKING: @@ -55,6 +56,7 @@ class FederationBase: self._clock = hs.get_clock() self._storage_controllers = hs.get_storage_controllers() + @trace async def _check_sigs_and_hash( self, room_version: RoomVersion, pdu: EventBase ) -> EventBase: @@ -97,17 +99,36 @@ class FederationBase: "Event %s seems to have been redacted; using our redacted copy", pdu.event_id, ) + log_kv( + { + "message": "Event seems to have been redacted; using our redacted copy", + "event_id": pdu.event_id, + } + ) else: logger.warning( "Event %s content has been tampered, redacting", pdu.event_id, ) + log_kv( + { + "message": "Event content has been tampered, redacting", + "event_id": pdu.event_id, + } + ) return redacted_event spam_check = await self.spam_checker.check_event_for_spam(pdu) if spam_check != self.spam_checker.NOT_SPAM: logger.warning("Event contains spam, soft-failing %s", pdu.event_id) + log_kv( + { + "message": "Event contains spam, redacting (to save disk space) " + "as well as soft-failing (to stop using the event in prev_events)", + "event_id": pdu.event_id, + } + ) # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). redacted_event = prune_event(pdu) @@ -117,6 +138,7 @@ class FederationBase: return pdu +@trace async def _check_sigs_on_pdu( keyring: Keyring, room_version: RoomVersion, pdu: EventBase ) -> None: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 987f6dad46..7ee2974bb1 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -61,7 +61,7 @@ from synapse.federation.federation_base import ( ) from synapse.federation.transport.client import SendJoinResponse from synapse.http.types import QueryParams -from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace +from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -587,11 +587,15 @@ class FederationClient(FederationBase): Returns: A list of PDUs that have valid signatures and hashes. """ + set_tag( + SynapseTags.RESULT_PREFIX + "pdus.length", + str(len(pdus)), + ) # We limit how many PDUs we check at once, as if we try to do hundreds # of thousands of PDUs at once we see large memory spikes. - valid_pdus = [] + valid_pdus: List[EventBase] = [] async def _execute(pdu: EventBase) -> None: valid_pdu = await self._check_sigs_and_hash_and_fetch_one( @@ -607,6 +611,8 @@ class FederationClient(FederationBase): return valid_pdus + @trace + @tag_args async def _check_sigs_and_hash_and_fetch_one( self, pdu: EventBase, @@ -639,16 +645,27 @@ class FederationClient(FederationBase): except InvalidEventSignatureError as e: logger.warning( "Signature on retrieved event %s was invalid (%s). " - "Checking local store/orgin server", + "Checking local store/origin server", pdu.event_id, e, ) + log_kv( + { + "message": "Signature on retrieved event was invalid. " + "Checking local store/origin server", + "event_id": pdu.event_id, + "InvalidEventSignatureError": e, + } + ) # Check local db. res = await self.store.get_event( pdu.event_id, allow_rejected=True, allow_none=True ) + # If the PDU fails its signature check and we don't have it in our + # database, we then request it from sender's server (if that is not the + # same as `origin`). pdu_origin = get_domain_from_id(pdu.sender) if not res and pdu_origin != origin: try: -- cgit 1.4.1