Fix occasional "Re-starting finished log context" from keyring (#8398)

* Fix test_verify_json_objects_for_server_awaits_previous_requests It turns out that this wasn't really testing what it thought it was testing (in particular, `check_context` was turning failures into success, which was making the tests pass even though it wasn't clear they should have been. It was also somewhat overcomplex - we can test what it was trying to test without mocking out perspectives servers. * Fix warnings about finished logcontexts in the keyring We need to make sure that we finish the key fetching magic before we run the verifying code, to ensure that we don't mess up our logcontexts.
author: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> 2020-09-25 12:29:54 +0100
committer: GitHub <noreply@github.com> 2020-09-25 12:29:54 +0100
commit: fec6f9ac178867a8e7c5410e0d25898f29bab35c (patch)
tree: ab127833049cd146c96a2fea7c85f116c26ec3ea /synapse/crypto
parent: Allow existing users to login via OpenID Connect. (#8345) (diff)
download: synapse-fec6f9ac178867a8e7c5410e0d25898f29bab35c.tar.xz
1 files changed, 44 insertions, 26 deletions
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 42e4087a92..c04ad77cf9 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -42,7 +42,6 @@ from synapse.api.errors import (
 )
 from synapse.logging.context import (
     PreserveLoggingContext,
-    current_context,
     make_deferred_yieldable,
     preserve_fn,
     run_in_background,
@@ -233,8 +232,6 @@ class Keyring:
         """
 
         try:
-            ctx = current_context()
-
             # map from server name to a set of outstanding request ids
             server_to_request_ids = {}
 
@@ -265,12 +262,8 @@ class Keyring:
 
                 # if there are no more requests for this server, we can drop the lock.
                 if not server_requests:
-                    with PreserveLoggingContext(ctx):
-                        logger.debug("Releasing key lookup lock on %s", server_name)
-
-                    # ... but not immediately, as that can cause stack explosions if
-                    # we get a long queue of lookups.
-                    self.clock.call_later(0, drop_server_lock, server_name)
+                    logger.debug("Releasing key lookup lock on %s", server_name)
+                    drop_server_lock(server_name)
 
                 return res
 
@@ -335,20 +328,32 @@ class Keyring:
                         )
 
                     # look for any requests which weren't satisfied
-                    with PreserveLoggingContext():
-                        for verify_request in remaining_requests:
-                            verify_request.key_ready.errback(
-                                SynapseError(
-                                    401,
-                                    "No key for %s with ids in %s (min_validity %i)"
-                                    % (
-                                        verify_request.server_name,
-                                        verify_request.key_ids,
-                                        verify_request.minimum_valid_until_ts,
-                                    ),
-                                    Codes.UNAUTHORIZED,
-                                )
+                    while remaining_requests:
+                        verify_request = remaining_requests.pop()
+                        rq_str = (
+                            "VerifyJsonRequest(server=%s, key_ids=%s, min_valid=%i)"
+                            % (
+                                verify_request.server_name,
+                                verify_request.key_ids,
+                                verify_request.minimum_valid_until_ts,
                             )
+                        )
+
+                        # If we run the errback immediately, it may cancel our
+                        # loggingcontext while we are still in it, so instead we
+                        # schedule it for the next time round the reactor.
+                        #
+                        # (this also ensures that we don't get a stack overflow if we
+                        # has a massive queue of lookups waiting for this server).
+                        self.clock.call_later(
+                            0,
+                            verify_request.key_ready.errback,
+                            SynapseError(
+                                401,
+                                "Failed to find any key to satisfy %s" % (rq_str,),
+                                Codes.UNAUTHORIZED,
+                            ),
+                        )
             except Exception as err:
                 # we don't really expect to get here, because any errors should already
                 # have been caught and logged. But if we do, let's log the error and make
@@ -410,10 +415,23 @@ class Keyring:
                     # key was not valid at this point
                     continue
 
-                with PreserveLoggingContext():
-                    verify_request.key_ready.callback(
-                        (server_name, key_id, fetch_key_result.verify_key)
-                    )
+                # we have a valid key for this request. If we run the callback
+                # immediately, it may cancel our loggingcontext while we are still in
+                # it, so instead we schedule it for the next time round the reactor.
+                #
+                # (this also ensures that we don't get a stack overflow if we had
+                # a massive queue of lookups waiting for this server).
+                logger.debug(
+                    "Found key %s:%s for %s",
+                    server_name,
+                    key_id,
+                    verify_request.request_name,
+                )
+                self.clock.call_later(
+                    0,
+                    verify_request.key_ready.callback,
+                    (server_name, key_id, fetch_key_result.verify_key),
+                )
                 completed.append(verify_request)
                 break
author	Richard van der Hoff <1389908+richvdh@users.noreply.github.com>	2020-09-25 12:29:54 +0100
committer	GitHub <noreply@github.com>	2020-09-25 12:29:54 +0100
commit	fec6f9ac178867a8e7c5410e0d25898f29bab35c (patch)
tree	ab127833049cd146c96a2fea7c85f116c26ec3ea /synapse/crypto
parent	Allow existing users to login via OpenID Connect. (#8345) (diff)
download	synapse-fec6f9ac178867a8e7c5410e0d25898f29bab35c.tar.xz