diff --git a/changelog.d/11001.bugfix b/changelog.d/11001.bugfix
new file mode 100644
index 0000000000..f51ffb3481
--- /dev/null
+++ b/changelog.d/11001.bugfix
@@ -0,0 +1 @@
+ Fix a long-standing bug which meant that events received over federation were sometimes incorrectly accepted into the room state.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 1705432d7c..af2c88394d 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1256,6 +1256,10 @@ class FederationEventHandler:
Returns:
The updated context object.
+
+ Raises:
+ AuthError if we were unable to find copies of the event's auth events.
+ (Most other failures just cause us to set `context.rejected`.)
"""
# This method should only be used for non-outliers
assert not event.internal_metadata.outlier
@@ -1272,7 +1276,26 @@ class FederationEventHandler:
context.rejected = RejectedReason.AUTH_ERROR
return context
- # calculate what the auth events *should* be, to use as a basis for auth.
+ # next, check that we have all of the event's auth events.
+ #
+ # Note that this can raise AuthError, which we want to propagate to the
+ # caller rather than swallow with `context.rejected` (since we cannot be
+ # certain that there is a permanent problem with the event).
+ claimed_auth_events = await self._load_or_fetch_auth_events_for_event(
+ origin, event
+ )
+
+ # ... and check that the event passes auth at those auth events.
+ try:
+ check_auth_rules_for_event(room_version_obj, event, claimed_auth_events)
+ except AuthError as e:
+ logger.warning(
+ "While checking auth of %r against auth_events: %s", event, e
+ )
+ context.rejected = RejectedReason.AUTH_ERROR
+ return context
+
+ # now check auth against what we think the auth events *should* be.
prev_state_ids = await context.get_prev_state_ids()
auth_events_ids = self._event_auth_handler.compute_auth_events(
event, prev_state_ids, for_verification=True
@@ -1472,6 +1495,9 @@ class FederationEventHandler:
# if we have missing events, we need to fetch those events from somewhere.
#
# we start by checking if they are in the store, and then try calling /event_auth/.
+ #
+ # TODO: this code is now redundant, since it should be impossible for us to
+ # get here without already having the auth events.
if missing_auth:
have_events = await self._store.have_seen_events(
event.room_id, missing_auth
@@ -1575,7 +1601,7 @@ class FederationEventHandler:
logger.info(
"After state res: updating auth_events with new state %s",
{
- (d.type, d.state_key): d.event_id
+ d
for d in new_state.values()
if auth_events.get((d.type, d.state_key)) != d
},
@@ -1589,6 +1615,75 @@ class FederationEventHandler:
return context, auth_events
+ async def _load_or_fetch_auth_events_for_event(
+ self, destination: str, event: EventBase
+ ) -> Collection[EventBase]:
+ """Fetch this event's auth_events, from database or remote
+
+ Loads any of the auth_events that we already have from the database/cache. If
+ there are any that are missing, calls /event_auth to get the complete auth
+ chain for the event (and then attempts to load the auth_events again).
+
+ If any of the auth_events cannot be found, raises an AuthError. This can happen
+ for a number of reasons; eg: the events don't exist, or we were unable to talk
+ to `destination`, or we couldn't validate the signature on the event (which
+ in turn has multiple potential causes).
+
+ Args:
+ destination: where to send the /event_auth request. Typically the server
+ that sent us `event` in the first place.
+ event: the event whose auth_events we want
+
+ Returns:
+ all of the events in `event.auth_events`, after deduplication
+
+ Raises:
+ AuthError if we were unable to fetch the auth_events for any reason.
+ """
+ event_auth_event_ids = set(event.auth_event_ids())
+ event_auth_events = await self._store.get_events(
+ event_auth_event_ids, allow_rejected=True
+ )
+ missing_auth_event_ids = event_auth_event_ids.difference(
+ event_auth_events.keys()
+ )
+ if not missing_auth_event_ids:
+ return event_auth_events.values()
+
+ logger.info(
+ "Event %s refers to unknown auth events %s: fetching auth chain",
+ event,
+ missing_auth_event_ids,
+ )
+ try:
+ await self._get_remote_auth_chain_for_event(
+ destination, event.room_id, event.event_id
+ )
+ except Exception as e:
+ logger.warning("Failed to get auth chain for %s: %s", event, e)
+ # in this case, it's very likely we still won't have all the auth
+ # events - but we pick that up below.
+
+ # try to fetch the auth events we missed list time.
+ extra_auth_events = await self._store.get_events(
+ missing_auth_event_ids, allow_rejected=True
+ )
+ missing_auth_event_ids.difference_update(extra_auth_events.keys())
+ event_auth_events.update(extra_auth_events)
+ if not missing_auth_event_ids:
+ return event_auth_events.values()
+
+ # we still don't have all the auth events.
+ logger.warning(
+ "Missing auth events for %s: %s",
+ event,
+ shortstr(missing_auth_event_ids),
+ )
+ # the fact we can't find the auth event doesn't mean it doesn't
+ # exist, which means it is premature to store `event` as rejected.
+ # instead we raise an AuthError, which will make the caller ignore it.
+ raise AuthError(code=HTTPStatus.FORBIDDEN, msg="Auth events could not be found")
+
async def _get_remote_auth_chain_for_event(
self, destination: str, room_id: str, event_id: str
) -> None:
|