From 963f4309fe29206f3ba92b493e922280feea30ed Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 30 Mar 2021 12:06:09 +0100
Subject: Make RateLimiter class check for ratelimit overrides (#9711)

This should fix a class of bug where we forget to check if e.g. the appservice shouldn't be ratelimited.

We also check the `ratelimit_override` table to check if the user has ratelimiting disabled. That table is really only meant to override the event sender ratelimiting, so we don't use any values from it (as they might not make sense for different rate limits), but we do infer that if ratelimiting is disabled for the user we should disabled all ratelimits.

Fixes #9663
---
 synapse/replication/http/register.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'synapse/replication')

diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py
index d005f38767..73d7477854 100644
--- a/synapse/replication/http/register.py
+++ b/synapse/replication/http/register.py
@@ -77,7 +77,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
     async def _handle_request(self, request, user_id):
         content = parse_json_object_from_request(request)
 
-        self.registration_handler.check_registration_ratelimit(content["address"])
+        await self.registration_handler.check_registration_ratelimit(content["address"])
 
         await self.registration_handler.register_with_store(
             user_id=user_id,
-- 
cgit 1.5.1


From e2b8a90897e137fd118768a3bf35b70642916eb7 Mon Sep 17 00:00:00 2001
From: Jonathan de Jong <jonathan@automatia.nl>
Date: Mon, 5 Apr 2021 15:10:18 +0200
Subject: Update mypy configuration: `no_implicit_optional = True` (#9742)

---
 changelog.d/9742.misc                          | 1 +
 mypy.ini                                       | 1 +
 synapse/handlers/account_validity.py           | 7 +++++--
 synapse/handlers/e2e_keys.py                   | 2 +-
 synapse/http/client.py                         | 2 +-
 synapse/notifier.py                            | 2 +-
 synapse/replication/tcp/redis.py               | 2 +-
 synapse/storage/databases/main/group_server.py | 4 ++--
 synapse/util/caches/deferred_cache.py          | 4 +++-
 tests/rest/client/v2_alpha/test_auth.py        | 7 +++++--
 10 files changed, 21 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/9742.misc

(limited to 'synapse/replication')

diff --git a/changelog.d/9742.misc b/changelog.d/9742.misc
new file mode 100644
index 0000000000..681ab04df8
--- /dev/null
+++ b/changelog.d/9742.misc
@@ -0,0 +1 @@
+Start linting mypy with `no_implicit_optional`.
\ No newline at end of file
diff --git a/mypy.ini b/mypy.ini
index 3ae5d45787..32e6197409 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -8,6 +8,7 @@ show_traceback = True
 mypy_path = stubs
 warn_unreachable = True
 local_partial_types = True
+no_implicit_optional = True
 
 # To find all folders that pass mypy you run:
 #
diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py
index d781bb251d..bee1447c2e 100644
--- a/synapse/handlers/account_validity.py
+++ b/synapse/handlers/account_validity.py
@@ -18,7 +18,7 @@ import email.utils
 import logging
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from typing import TYPE_CHECKING, List
+from typing import TYPE_CHECKING, List, Optional
 
 from synapse.api.errors import StoreError, SynapseError
 from synapse.logging.context import make_deferred_yieldable
@@ -241,7 +241,10 @@ class AccountValidityHandler:
         return True
 
     async def renew_account_for_user(
-        self, user_id: str, expiration_ts: int = None, email_sent: bool = False
+        self,
+        user_id: str,
+        expiration_ts: Optional[int] = None,
+        email_sent: bool = False,
     ) -> int:
         """Renews the account attached to a given user by pushing back the
         expiration date by the current validity period in the server's
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 2ad9b6d930..739653a3fa 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -1008,7 +1008,7 @@ class E2eKeysHandler:
         return signature_list, failures
 
     async def _get_e2e_cross_signing_verify_key(
-        self, user_id: str, key_type: str, from_user_id: str = None
+        self, user_id: str, key_type: str, from_user_id: Optional[str] = None
     ) -> Tuple[JsonDict, str, VerifyKey]:
         """Fetch locally or remotely query for a cross-signing public key.
 
diff --git a/synapse/http/client.py b/synapse/http/client.py
index a0caba84e4..e691ba6d88 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -590,7 +590,7 @@ class SimpleHttpClient:
         uri: str,
         json_body: Any,
         args: Optional[QueryParams] = None,
-        headers: RawHeaders = None,
+        headers: Optional[RawHeaders] = None,
     ) -> Any:
         """Puts some json to the given URI.
 
diff --git a/synapse/notifier.py b/synapse/notifier.py
index d35c1f3f02..c178db57e3 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -548,7 +548,7 @@ class Notifier:
         pagination_config: PaginationConfig,
         timeout: int,
         is_guest: bool = False,
-        explicit_room_id: str = None,
+        explicit_room_id: Optional[str] = None,
     ) -> EventStreamResult:
         """For the given user and rooms, return any new events for them. If
         there are no new events wait for up to `timeout` milliseconds for any
diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py
index 2f4d407f94..98bdeb0ec6 100644
--- a/synapse/replication/tcp/redis.py
+++ b/synapse/replication/tcp/redis.py
@@ -60,7 +60,7 @@ class ConstantProperty(Generic[T, V]):
 
     constant = attr.ib()  # type: V
 
-    def __get__(self, obj: Optional[T], objtype: Type[T] = None) -> V:
+    def __get__(self, obj: Optional[T], objtype: Optional[Type[T]] = None) -> V:
         return self.constant
 
     def __set__(self, obj: Optional[T], value: V):
diff --git a/synapse/storage/databases/main/group_server.py b/synapse/storage/databases/main/group_server.py
index ac07e0197b..8f462dfc31 100644
--- a/synapse/storage/databases/main/group_server.py
+++ b/synapse/storage/databases/main/group_server.py
@@ -1027,8 +1027,8 @@ class GroupServerStore(GroupServerWorkerStore):
         user_id: str,
         is_admin: bool = False,
         is_public: bool = True,
-        local_attestation: dict = None,
-        remote_attestation: dict = None,
+        local_attestation: Optional[dict] = None,
+        remote_attestation: Optional[dict] = None,
     ) -> None:
         """Add a user to the group server.
 
diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index 1adc92eb90..dd392cf694 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -283,7 +283,9 @@ class DeferredCache(Generic[KT, VT]):
         # we return a new Deferred which will be called before any subsequent observers.
         return observable.observe()
 
-    def prefill(self, key: KT, value: VT, callback: Callable[[], None] = None):
+    def prefill(
+        self, key: KT, value: VT, callback: Optional[Callable[[], None]] = None
+    ):
         callbacks = [callback] if callback else []
         self.cache.set(key, value, callbacks=callbacks)
 
diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py
index 9734a2159a..ed433d9333 100644
--- a/tests/rest/client/v2_alpha/test_auth.py
+++ b/tests/rest/client/v2_alpha/test_auth.py
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Union
+from typing import Optional, Union
 
 from twisted.internet.defer import succeed
 
@@ -74,7 +74,10 @@ class FallbackAuthTests(unittest.HomeserverTestCase):
         return channel
 
     def recaptcha(
-        self, session: str, expected_post_response: int, post_session: str = None
+        self,
+        session: str,
+        expected_post_response: int,
+        post_session: Optional[str] = None,
     ) -> None:
         """Get and respond to a fallback recaptcha. Returns the second request."""
         if post_session is None:
-- 
cgit 1.5.1


From 48d44ab1425ffac721b7d407823c2315cda1929a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 8 Apr 2021 08:01:14 -0400
Subject: Record more information into structured logs. (#9654)

Records additional request information into the structured logs,
e.g. the requester, IP address, etc.
---
 changelog.d/9654.feature                      |   1 +
 synapse/http/site.py                          | 112 ++++++++++++++++++++------
 synapse/logging/context.py                    |  70 +++++++++++++---
 synapse/metrics/background_process_metrics.py |  18 +++--
 synapse/replication/tcp/protocol.py           |   5 +-
 tests/crypto/test_keyring.py                  |  23 +++---
 tests/logging/test_terse_json.py              |  70 +++++++++++++++-
 tests/unittest.py                             |   2 +-
 tests/util/caches/test_descriptors.py         |   7 +-
 tests/util/test_logcontext.py                 |  35 +++-----
 10 files changed, 255 insertions(+), 88 deletions(-)
 create mode 100644 changelog.d/9654.feature

(limited to 'synapse/replication')

diff --git a/changelog.d/9654.feature b/changelog.d/9654.feature
new file mode 100644
index 0000000000..a54c96cf19
--- /dev/null
+++ b/changelog.d/9654.feature
@@ -0,0 +1 @@
+Include request information in structured logging output.
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 47754aff43..c0c873ce32 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -14,7 +14,7 @@
 import contextlib
 import logging
 import time
-from typing import Optional, Type, Union
+from typing import Optional, Tuple, Type, Union
 
 import attr
 from zope.interface import implementer
@@ -26,7 +26,11 @@ from twisted.web.server import Request, Site
 from synapse.config.server import ListenerConfig
 from synapse.http import get_request_user_agent, redact_uri
 from synapse.http.request_metrics import RequestMetrics, requests_counter
-from synapse.logging.context import LoggingContext, PreserveLoggingContext
+from synapse.logging.context import (
+    ContextRequest,
+    LoggingContext,
+    PreserveLoggingContext,
+)
 from synapse.types import Requester
 
 logger = logging.getLogger(__name__)
@@ -63,7 +67,7 @@ class SynapseRequest(Request):
 
         # The requester, if authenticated. For federation requests this is the
         # server name, for client requests this is the Requester object.
-        self.requester = None  # type: Optional[Union[Requester, str]]
+        self._requester = None  # type: Optional[Union[Requester, str]]
 
         # we can't yet create the logcontext, as we don't know the method.
         self.logcontext = None  # type: Optional[LoggingContext]
@@ -93,6 +97,31 @@ class SynapseRequest(Request):
             self.site.site_tag,
         )
 
+    @property
+    def requester(self) -> Optional[Union[Requester, str]]:
+        return self._requester
+
+    @requester.setter
+    def requester(self, value: Union[Requester, str]) -> None:
+        # Store the requester, and update some properties based on it.
+
+        # This should only be called once.
+        assert self._requester is None
+
+        self._requester = value
+
+        # A logging context should exist by now (and have a ContextRequest).
+        assert self.logcontext is not None
+        assert self.logcontext.request is not None
+
+        (
+            requester,
+            authenticated_entity,
+        ) = self.get_authenticated_entity()
+        self.logcontext.request.requester = requester
+        # If there's no authenticated entity, it was the requester.
+        self.logcontext.request.authenticated_entity = authenticated_entity or requester
+
     def get_request_id(self):
         return "%s-%i" % (self.get_method(), self.request_seq)
 
@@ -126,13 +155,60 @@ class SynapseRequest(Request):
             return self.method.decode("ascii")
         return method
 
+    def get_authenticated_entity(self) -> Tuple[Optional[str], Optional[str]]:
+        """
+        Get the "authenticated" entity of the request, which might be the user
+        performing the action, or a user being puppeted by a server admin.
+
+        Returns:
+            A tuple:
+                The first item is a string representing the user making the request.
+
+                The second item is a string or None representing the user who
+                authenticated when making this request. See
+                Requester.authenticated_entity.
+        """
+        # Convert the requester into a string that we can log
+        if isinstance(self._requester, str):
+            return self._requester, None
+        elif isinstance(self._requester, Requester):
+            requester = self._requester.user.to_string()
+            authenticated_entity = self._requester.authenticated_entity
+
+            # If this is a request where the target user doesn't match the user who
+            # authenticated (e.g. and admin is puppetting a user) then we return both.
+            if self._requester.user.to_string() != authenticated_entity:
+                return requester, authenticated_entity
+
+            return requester, None
+        elif self._requester is not None:
+            # This shouldn't happen, but we log it so we don't lose information
+            # and can see that we're doing something wrong.
+            return repr(self._requester), None  # type: ignore[unreachable]
+
+        return None, None
+
     def render(self, resrc):
         # this is called once a Resource has been found to serve the request; in our
         # case the Resource in question will normally be a JsonResource.
 
         # create a LogContext for this request
         request_id = self.get_request_id()
-        self.logcontext = LoggingContext(request_id, request=request_id)
+        self.logcontext = LoggingContext(
+            request_id,
+            request=ContextRequest(
+                request_id=request_id,
+                ip_address=self.getClientIP(),
+                site_tag=self.site.site_tag,
+                # The requester is going to be unknown at this point.
+                requester=None,
+                authenticated_entity=None,
+                method=self.get_method(),
+                url=self.get_redacted_uri(),
+                protocol=self.clientproto.decode("ascii", errors="replace"),
+                user_agent=get_request_user_agent(self),
+            ),
+        )
 
         # override the Server header which is set by twisted
         self.setHeader("Server", self.site.server_version_string)
@@ -277,25 +353,6 @@ class SynapseRequest(Request):
         # to the client (nb may be negative)
         response_send_time = self.finish_time - self._processing_finished_time
 
-        # Convert the requester into a string that we can log
-        authenticated_entity = None
-        if isinstance(self.requester, str):
-            authenticated_entity = self.requester
-        elif isinstance(self.requester, Requester):
-            authenticated_entity = self.requester.authenticated_entity
-
-            # If this is a request where the target user doesn't match the user who
-            # authenticated (e.g. and admin is puppetting a user) then we log both.
-            if self.requester.user.to_string() != authenticated_entity:
-                authenticated_entity = "{},{}".format(
-                    authenticated_entity,
-                    self.requester.user.to_string(),
-                )
-        elif self.requester is not None:
-            # This shouldn't happen, but we log it so we don't lose information
-            # and can see that we're doing something wrong.
-            authenticated_entity = repr(self.requester)  # type: ignore[unreachable]
-
         user_agent = get_request_user_agent(self, "-")
 
         code = str(self.code)
@@ -305,6 +362,13 @@ class SynapseRequest(Request):
             code += "!"
 
         log_level = logging.INFO if self._should_log_request() else logging.DEBUG
+
+        # If this is a request where the target user doesn't match the user who
+        # authenticated (e.g. and admin is puppetting a user) then we log both.
+        requester, authenticated_entity = self.get_authenticated_entity()
+        if authenticated_entity:
+            requester = "{}.{}".format(authenticated_entity, requester)
+
         self.site.access_logger.log(
             log_level,
             "%s - %s - {%s}"
@@ -312,7 +376,7 @@ class SynapseRequest(Request):
             ' %sB %s "%s %s %s" "%s" [%d dbevts]',
             self.getClientIP(),
             self.site.site_tag,
-            authenticated_entity,
+            requester,
             processing_time,
             response_send_time,
             usage.ru_utime,
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 03cf3c2b8e..e78343f554 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -22,7 +22,6 @@ them.
 
 See doc/log_contexts.rst for details on how this works.
 """
-
 import inspect
 import logging
 import threading
@@ -30,6 +29,7 @@ import types
 import warnings
 from typing import TYPE_CHECKING, Optional, Tuple, TypeVar, Union
 
+import attr
 from typing_extensions import Literal
 
 from twisted.internet import defer, threads
@@ -181,6 +181,29 @@ class ContextResourceUsage:
         return res
 
 
+@attr.s(slots=True)
+class ContextRequest:
+    """
+    A bundle of attributes from the SynapseRequest object.
+
+    This exists to:
+
+    * Avoid a cycle between LoggingContext and SynapseRequest.
+    * Be a single variable that can be passed from parent LoggingContexts to
+      their children.
+    """
+
+    request_id = attr.ib(type=str)
+    ip_address = attr.ib(type=str)
+    site_tag = attr.ib(type=str)
+    requester = attr.ib(type=Optional[str])
+    authenticated_entity = attr.ib(type=Optional[str])
+    method = attr.ib(type=str)
+    url = attr.ib(type=str)
+    protocol = attr.ib(type=str)
+    user_agent = attr.ib(type=str)
+
+
 LoggingContextOrSentinel = Union["LoggingContext", "_Sentinel"]
 
 
@@ -256,7 +279,7 @@ class LoggingContext:
         self,
         name: Optional[str] = None,
         parent_context: "Optional[LoggingContext]" = None,
-        request: Optional[str] = None,
+        request: Optional[ContextRequest] = None,
     ) -> None:
         self.previous_context = current_context()
         self.name = name
@@ -281,7 +304,11 @@ class LoggingContext:
         self.parent_context = parent_context
 
         if self.parent_context is not None:
-            self.parent_context.copy_to(self)
+            # we track the current request_id
+            self.request = self.parent_context.request
+
+            # we also track the current scope:
+            self.scope = self.parent_context.scope
 
         if request is not None:
             # the request param overrides the request from the parent context
@@ -289,7 +316,7 @@ class LoggingContext:
 
     def __str__(self) -> str:
         if self.request:
-            return str(self.request)
+            return self.request.request_id
         return "%s@%x" % (self.name, id(self))
 
     @classmethod
@@ -556,8 +583,23 @@ class LoggingContextFilter(logging.Filter):
         # we end up in a death spiral of infinite loops, so let's check, for
         # robustness' sake.
         if context is not None:
-            # Logging is interested in the request.
-            record.request = context.request  # type: ignore
+            # Logging is interested in the request ID. Note that for backwards
+            # compatibility this is stored as the "request" on the record.
+            record.request = str(context)  # type: ignore
+
+            # Add some data from the HTTP request.
+            request = context.request
+            if request is None:
+                return True
+
+            record.ip_address = request.ip_address  # type: ignore
+            record.site_tag = request.site_tag  # type: ignore
+            record.requester = request.requester  # type: ignore
+            record.authenticated_entity = request.authenticated_entity  # type: ignore
+            record.method = request.method  # type: ignore
+            record.url = request.url  # type: ignore
+            record.protocol = request.protocol  # type: ignore
+            record.user_agent = request.user_agent  # type: ignore
 
         return True
 
@@ -630,8 +672,8 @@ def set_current_context(context: LoggingContextOrSentinel) -> LoggingContextOrSe
 def nested_logging_context(suffix: str) -> LoggingContext:
     """Creates a new logging context as a child of another.
 
-    The nested logging context will have a 'request' made up of the parent context's
-    request, plus the given suffix.
+    The nested logging context will have a 'name' made up of the parent context's
+    name, plus the given suffix.
 
     CPU/db usage stats will be added to the parent context's on exit.
 
@@ -641,7 +683,7 @@ def nested_logging_context(suffix: str) -> LoggingContext:
             # ... do stuff
 
     Args:
-        suffix: suffix to add to the parent context's 'request'.
+        suffix: suffix to add to the parent context's 'name'.
 
     Returns:
         LoggingContext: new logging context.
@@ -653,11 +695,17 @@ def nested_logging_context(suffix: str) -> LoggingContext:
         )
         parent_context = None
         prefix = ""
+        request = None
     else:
         assert isinstance(curr_context, LoggingContext)
         parent_context = curr_context
-        prefix = str(parent_context.request)
-    return LoggingContext(parent_context=parent_context, request=prefix + "-" + suffix)
+        prefix = str(parent_context.name)
+        request = parent_context.request
+    return LoggingContext(
+        prefix + "-" + suffix,
+        parent_context=parent_context,
+        request=request,
+    )
 
 
 def preserve_fn(f):
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index b56986d8e7..e8a9096c03 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -16,7 +16,7 @@
 import logging
 import threading
 from functools import wraps
-from typing import TYPE_CHECKING, Dict, Optional, Set
+from typing import TYPE_CHECKING, Dict, Optional, Set, Union
 
 from prometheus_client.core import REGISTRY, Counter, Gauge
 
@@ -199,11 +199,11 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar
         _background_process_start_count.labels(desc).inc()
         _background_process_in_flight_count.labels(desc).inc()
 
-        with BackgroundProcessLoggingContext(desc, "%s-%i" % (desc, count)) as context:
+        with BackgroundProcessLoggingContext(desc, count) as context:
             try:
                 ctx = noop_context_manager()
                 if bg_start_span:
-                    ctx = start_active_span(desc, tags={"request_id": context.request})
+                    ctx = start_active_span(desc, tags={"request_id": str(context)})
                 with ctx:
                     return await maybe_awaitable(func(*args, **kwargs))
             except Exception:
@@ -242,13 +242,19 @@ class BackgroundProcessLoggingContext(LoggingContext):
     processes.
     """
 
-    __slots__ = ["_proc"]
+    __slots__ = ["_id", "_proc"]
 
-    def __init__(self, name: str, request: Optional[str] = None):
-        super().__init__(name, request=request)
+    def __init__(self, name: str, id: Optional[Union[int, str]] = None):
+        super().__init__(name)
+        self._id = id
 
         self._proc = _BackgroundProcess(name, self)
 
+    def __str__(self) -> str:
+        if self._id is not None:
+            return "%s-%s" % (self.name, self._id)
+        return "%s@%x" % (self.name, id(self))
+
     def start(self, rusage: "Optional[resource._RUsage]"):
         """Log context has started running (again)."""
 
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index e829add257..d10d574246 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -184,8 +184,9 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
 
         # a logcontext which we use for processing incoming commands. We declare it as a
         # background process so that the CPU stats get reported to prometheus.
-        ctx_name = "replication-conn-%s" % self.conn_id
-        self._logging_context = BackgroundProcessLoggingContext(ctx_name, ctx_name)
+        self._logging_context = BackgroundProcessLoggingContext(
+            "replication-conn", self.conn_id
+        )
 
     def connectionMade(self):
         logger.info("[%s] Connection established", self.id())
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 30fcc4c1bf..946482b7e7 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -16,6 +16,7 @@ import time
 
 from mock import Mock
 
+import attr
 import canonicaljson
 import signedjson.key
 import signedjson.sign
@@ -68,6 +69,11 @@ class MockPerspectiveServer:
         signedjson.sign.sign_json(res, self.server_name, self.key)
 
 
+@attr.s(slots=True)
+class FakeRequest:
+    id = attr.ib()
+
+
 @logcontext_clean
 class KeyringTestCase(unittest.HomeserverTestCase):
     def check_context(self, val, expected):
@@ -89,7 +95,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         first_lookup_deferred = Deferred()
 
         async def first_lookup_fetch(keys_to_fetch):
-            self.assertEquals(current_context().request, "context_11")
+            self.assertEquals(current_context().request.id, "context_11")
             self.assertEqual(keys_to_fetch, {"server10": {get_key_id(key1): 0}})
 
             await make_deferred_yieldable(first_lookup_deferred)
@@ -102,9 +108,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         mock_fetcher.get_keys.side_effect = first_lookup_fetch
 
         async def first_lookup():
-            with LoggingContext("context_11") as context_11:
-                context_11.request = "context_11"
-
+            with LoggingContext("context_11", request=FakeRequest("context_11")):
                 res_deferreds = kr.verify_json_objects_for_server(
                     [("server10", json1, 0, "test10"), ("server11", {}, 0, "test11")]
                 )
@@ -130,7 +134,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         # should block rather than start a second call
 
         async def second_lookup_fetch(keys_to_fetch):
-            self.assertEquals(current_context().request, "context_12")
+            self.assertEquals(current_context().request.id, "context_12")
             return {
                 "server10": {
                     get_key_id(key1): FetchKeyResult(get_verify_key(key1), 100)
@@ -142,9 +146,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         second_lookup_state = [0]
 
         async def second_lookup():
-            with LoggingContext("context_12") as context_12:
-                context_12.request = "context_12"
-
+            with LoggingContext("context_12", request=FakeRequest("context_12")):
                 res_deferreds_2 = kr.verify_json_objects_for_server(
                     [("server10", json1, 0, "test")]
                 )
@@ -589,10 +591,7 @@ def get_key_id(key):
 
 @defer.inlineCallbacks
 def run_in_context(f, *args, **kwargs):
-    with LoggingContext("testctx") as ctx:
-        # we set the "request" prop to make it easier to follow what's going on in the
-        # logs.
-        ctx.request = "testctx"
+    with LoggingContext("testctx"):
         rv = yield f(*args, **kwargs)
     return rv
 
diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py
index 48a74e2eee..bfe0d11c93 100644
--- a/tests/logging/test_terse_json.py
+++ b/tests/logging/test_terse_json.py
@@ -12,15 +12,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import json
 import logging
-from io import StringIO
+from io import BytesIO, StringIO
+
+from mock import Mock, patch
+
+from twisted.web.server import Request
 
+from synapse.http.site import SynapseRequest
 from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter
 from synapse.logging.context import LoggingContext, LoggingContextFilter
 
 from tests.logging import LoggerCleanupMixin
+from tests.server import FakeChannel
 from tests.unittest import TestCase
 
 
@@ -120,7 +125,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         handler.addFilter(LoggingContextFilter())
         logger = self.get_logger(handler)
 
-        with LoggingContext(request="test"):
+        with LoggingContext("name"):
             logger.info("Hello there, %s!", "wally")
 
         log = self.get_log_line()
@@ -134,4 +139,61 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         ]
         self.assertCountEqual(log.keys(), expected_log_keys)
         self.assertEqual(log["log"], "Hello there, wally!")
-        self.assertEqual(log["request"], "test")
+        self.assertTrue(log["request"].startswith("name@"))
+
+    def test_with_request_context(self):
+        """
+        Information from the logging context request should be added to the JSON response.
+        """
+        handler = logging.StreamHandler(self.output)
+        handler.setFormatter(JsonFormatter())
+        handler.addFilter(LoggingContextFilter())
+        logger = self.get_logger(handler)
+
+        # A full request isn't needed here.
+        site = Mock(spec=["site_tag", "server_version_string", "getResourceFor"])
+        site.site_tag = "test-site"
+        site.server_version_string = "Server v1"
+        request = SynapseRequest(FakeChannel(site, None))
+        # Call requestReceived to finish instantiating the object.
+        request.content = BytesIO()
+        # Partially skip some of the internal processing of SynapseRequest.
+        request._started_processing = Mock()
+        request.request_metrics = Mock(spec=["name"])
+        with patch.object(Request, "render"):
+            request.requestReceived(b"POST", b"/_matrix/client/versions", b"1.1")
+
+        # Also set the requester to ensure the processing works.
+        request.requester = "@foo:test"
+
+        with LoggingContext(parent_context=request.logcontext):
+            logger.info("Hello there, %s!", "wally")
+
+        log = self.get_log_line()
+
+        # The terse logger includes additional request information, if possible.
+        expected_log_keys = [
+            "log",
+            "level",
+            "namespace",
+            "request",
+            "ip_address",
+            "site_tag",
+            "requester",
+            "authenticated_entity",
+            "method",
+            "url",
+            "protocol",
+            "user_agent",
+        ]
+        self.assertCountEqual(log.keys(), expected_log_keys)
+        self.assertEqual(log["log"], "Hello there, wally!")
+        self.assertTrue(log["request"].startswith("POST-"))
+        self.assertEqual(log["ip_address"], "127.0.0.1")
+        self.assertEqual(log["site_tag"], "test-site")
+        self.assertEqual(log["requester"], "@foo:test")
+        self.assertEqual(log["authenticated_entity"], "@foo:test")
+        self.assertEqual(log["method"], "POST")
+        self.assertEqual(log["url"], "/_matrix/client/versions")
+        self.assertEqual(log["protocol"], "1.1")
+        self.assertEqual(log["user_agent"], "")
diff --git a/tests/unittest.py b/tests/unittest.py
index 58a4daa1ec..57b6a395c7 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -471,7 +471,7 @@ class HomeserverTestCase(TestCase):
         kwargs["config"] = config_obj
 
         async def run_bg_updates():
-            with LoggingContext("run_bg_updates", request="run_bg_updates-1"):
+            with LoggingContext("run_bg_updates"):
                 while not await stor.db_pool.updates.has_completed_background_updates():
                     await stor.db_pool.updates.do_next_background_update(1)
 
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index afb11b9caf..e434e21aee 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -661,14 +661,13 @@ class CachedListDescriptorTestCase(unittest.TestCase):
 
             @descriptors.cachedList("fn", "args1")
             async def list_fn(self, args1, arg2):
-                assert current_context().request == "c1"
+                assert current_context().name == "c1"
                 # we want this to behave like an asynchronous function
                 await run_on_reactor()
-                assert current_context().request == "c1"
+                assert current_context().name == "c1"
                 return self.mock(args1, arg2)
 
-        with LoggingContext() as c1:
-            c1.request = "c1"
+        with LoggingContext("c1") as c1:
             obj = Cls()
             obj.mock.return_value = {10: "fish", 20: "chips"}
             d1 = obj.list_fn([10, 20], 2)
diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py
index 58ee918f65..5d9c4665aa 100644
--- a/tests/util/test_logcontext.py
+++ b/tests/util/test_logcontext.py
@@ -17,11 +17,10 @@ from .. import unittest
 
 class LoggingContextTestCase(unittest.TestCase):
     def _check_test_key(self, value):
-        self.assertEquals(current_context().request, value)
+        self.assertEquals(current_context().name, value)
 
     def test_with_context(self):
-        with LoggingContext() as context_one:
-            context_one.request = "test"
+        with LoggingContext("test"):
             self._check_test_key("test")
 
     @defer.inlineCallbacks
@@ -30,15 +29,13 @@ class LoggingContextTestCase(unittest.TestCase):
 
         @defer.inlineCallbacks
         def competing_callback():
-            with LoggingContext() as competing_context:
-                competing_context.request = "competing"
+            with LoggingContext("competing"):
                 yield clock.sleep(0)
                 self._check_test_key("competing")
 
         reactor.callLater(0, competing_callback)
 
-        with LoggingContext() as context_one:
-            context_one.request = "one"
+        with LoggingContext("one"):
             yield clock.sleep(0)
             self._check_test_key("one")
 
@@ -47,9 +44,7 @@ class LoggingContextTestCase(unittest.TestCase):
 
         callback_completed = [False]
 
-        with LoggingContext() as context_one:
-            context_one.request = "one"
-
+        with LoggingContext("one"):
             # fire off function, but don't wait on it.
             d2 = run_in_background(function)
 
@@ -133,9 +128,7 @@ class LoggingContextTestCase(unittest.TestCase):
 
         sentinel_context = current_context()
 
-        with LoggingContext() as context_one:
-            context_one.request = "one"
-
+        with LoggingContext("one"):
             d1 = make_deferred_yieldable(blocking_function())
             # make sure that the context was reset by make_deferred_yieldable
             self.assertIs(current_context(), sentinel_context)
@@ -149,9 +142,7 @@ class LoggingContextTestCase(unittest.TestCase):
     def test_make_deferred_yieldable_with_chained_deferreds(self):
         sentinel_context = current_context()
 
-        with LoggingContext() as context_one:
-            context_one.request = "one"
-
+        with LoggingContext("one"):
             d1 = make_deferred_yieldable(_chained_deferred_function())
             # make sure that the context was reset by make_deferred_yieldable
             self.assertIs(current_context(), sentinel_context)
@@ -166,9 +157,7 @@ class LoggingContextTestCase(unittest.TestCase):
         """Check that make_deferred_yieldable does the right thing when its
         argument isn't actually a deferred"""
 
-        with LoggingContext() as context_one:
-            context_one.request = "one"
-
+        with LoggingContext("one"):
             d1 = make_deferred_yieldable("bum")
             self._check_test_key("one")
 
@@ -177,9 +166,9 @@ class LoggingContextTestCase(unittest.TestCase):
             self._check_test_key("one")
 
     def test_nested_logging_context(self):
-        with LoggingContext(request="foo"):
+        with LoggingContext("foo"):
             nested_context = nested_logging_context(suffix="bar")
-            self.assertEqual(nested_context.request, "foo-bar")
+            self.assertEqual(nested_context.name, "foo-bar")
 
     @defer.inlineCallbacks
     def test_make_deferred_yieldable_with_await(self):
@@ -193,9 +182,7 @@ class LoggingContextTestCase(unittest.TestCase):
 
         sentinel_context = current_context()
 
-        with LoggingContext() as context_one:
-            context_one.request = "one"
-
+        with LoggingContext("one"):
             d1 = make_deferred_yieldable(blocking_function())
             # make sure that the context was reset by make_deferred_yieldable
             self.assertIs(current_context(), sentinel_context)
-- 
cgit 1.5.1


From b076bc276e881b262048307b6a226061d96c4a8d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 20 Apr 2021 09:19:00 -0400
Subject: Always use the name as the log ID. (#9829)

As far as I can tell our logging contexts are meant to log the request ID, or sometimes the request ID followed by a suffix (this is generally stored in the name field of LoggingContext). There's also code to log the name@memory location, but I'm not sure this is ever used.

This simplifies the code paths to require every logging context to have a name and use that in logging. For sub-contexts (created via nested_logging_contexts, defer_to_threadpool, Measure) we use the current context's str (which becomes their name or the string "sentinel") and then potentially modify that (e.g. add a suffix).
---
 changelog.d/9829.bugfix                       |  1 +
 synapse/logging/context.py                    | 14 ++++----------
 synapse/metrics/background_process_metrics.py | 15 ++++-----------
 synapse/replication/tcp/protocol.py           |  2 +-
 synapse/util/metrics.py                       | 14 +++++++++-----
 tests/logging/test_terse_json.py              |  6 ++++--
 tests/test_federation.py                      |  2 +-
 tests/util/caches/test_descriptors.py         |  6 ++----
 8 files changed, 26 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/9829.bugfix

(limited to 'synapse/replication')

diff --git a/changelog.d/9829.bugfix b/changelog.d/9829.bugfix
new file mode 100644
index 0000000000..d0c1e49fd8
--- /dev/null
+++ b/changelog.d/9829.bugfix
@@ -0,0 +1 @@
+Fix the log lines of nested logging contexts.
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index e78343f554..dbd7d3a33a 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -277,7 +277,7 @@ class LoggingContext:
 
     def __init__(
         self,
-        name: Optional[str] = None,
+        name: str,
         parent_context: "Optional[LoggingContext]" = None,
         request: Optional[ContextRequest] = None,
     ) -> None:
@@ -315,9 +315,7 @@ class LoggingContext:
             self.request = request
 
     def __str__(self) -> str:
-        if self.request:
-            return self.request.request_id
-        return "%s@%x" % (self.name, id(self))
+        return self.name
 
     @classmethod
     def current_context(cls) -> LoggingContextOrSentinel:
@@ -694,17 +692,13 @@ def nested_logging_context(suffix: str) -> LoggingContext:
             "Starting nested logging context from sentinel context: metrics will be lost"
         )
         parent_context = None
-        prefix = ""
-        request = None
     else:
         assert isinstance(curr_context, LoggingContext)
         parent_context = curr_context
-        prefix = str(parent_context.name)
-        request = parent_context.request
+    prefix = str(curr_context)
     return LoggingContext(
         prefix + "-" + suffix,
         parent_context=parent_context,
-        request=request,
     )
 
 
@@ -895,7 +889,7 @@ def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
         parent_context = curr_context
 
     def g():
-        with LoggingContext(parent_context=parent_context):
+        with LoggingContext(str(curr_context), parent_context=parent_context):
             return f(*args, **kwargs)
 
     return make_deferred_yieldable(threads.deferToThreadPool(reactor, threadpool, g))
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index e8a9096c03..78e9cfbc26 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -16,7 +16,7 @@
 import logging
 import threading
 from functools import wraps
-from typing import TYPE_CHECKING, Dict, Optional, Set, Union
+from typing import TYPE_CHECKING, Dict, Optional, Set
 
 from prometheus_client.core import REGISTRY, Counter, Gauge
 
@@ -199,7 +199,7 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar
         _background_process_start_count.labels(desc).inc()
         _background_process_in_flight_count.labels(desc).inc()
 
-        with BackgroundProcessLoggingContext(desc, count) as context:
+        with BackgroundProcessLoggingContext("%s-%s" % (desc, count)) as context:
             try:
                 ctx = noop_context_manager()
                 if bg_start_span:
@@ -242,19 +242,12 @@ class BackgroundProcessLoggingContext(LoggingContext):
     processes.
     """
 
-    __slots__ = ["_id", "_proc"]
+    __slots__ = ["_proc"]
 
-    def __init__(self, name: str, id: Optional[Union[int, str]] = None):
+    def __init__(self, name: str):
         super().__init__(name)
-        self._id = id
-
         self._proc = _BackgroundProcess(name, self)
 
-    def __str__(self) -> str:
-        if self._id is not None:
-            return "%s-%s" % (self.name, self._id)
-        return "%s@%x" % (self.name, id(self))
-
     def start(self, rusage: "Optional[resource._RUsage]"):
         """Log context has started running (again)."""
 
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index d10d574246..ba753318bd 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -185,7 +185,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
         # a logcontext which we use for processing incoming commands. We declare it as a
         # background process so that the CPU stats get reported to prometheus.
         self._logging_context = BackgroundProcessLoggingContext(
-            "replication-conn", self.conn_id
+            "replication-conn-%s" % (self.conn_id,)
         )
 
     def connectionMade(self):
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 1023c856d1..019cfa17cc 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -105,7 +105,13 @@ class Measure:
         "start",
     ]
 
-    def __init__(self, clock, name):
+    def __init__(self, clock, name: str):
+        """
+        Args:
+            clock: A n object with a "time()" method, which returns the current
+                time in seconds.
+            name: The name of the metric to report.
+        """
         self.clock = clock
         self.name = name
         curr_context = current_context()
@@ -118,10 +124,8 @@ class Measure:
         else:
             assert isinstance(curr_context, LoggingContext)
             parent_context = curr_context
-        self._logging_context = LoggingContext(
-            "Measure[%s]" % (self.name,), parent_context
-        )
-        self.start = None
+        self._logging_context = LoggingContext(str(curr_context), parent_context)
+        self.start = None  # type: Optional[int]
 
     def __enter__(self) -> "Measure":
         if self.start is not None:
diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py
index 215fd8b0f9..ecf873e2ab 100644
--- a/tests/logging/test_terse_json.py
+++ b/tests/logging/test_terse_json.py
@@ -138,7 +138,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         ]
         self.assertCountEqual(log.keys(), expected_log_keys)
         self.assertEqual(log["log"], "Hello there, wally!")
-        self.assertTrue(log["request"].startswith("name@"))
+        self.assertEqual(log["request"], "name")
 
     def test_with_request_context(self):
         """
@@ -165,7 +165,9 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         # Also set the requester to ensure the processing works.
         request.requester = "@foo:test"
 
-        with LoggingContext(parent_context=request.logcontext):
+        with LoggingContext(
+            request.get_request_id(), parent_context=request.logcontext
+        ):
             logger.info("Hello there, %s!", "wally")
 
         log = self.get_log_line()
diff --git a/tests/test_federation.py b/tests/test_federation.py
index 8928597d17..382cedbd5d 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -134,7 +134,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
             }
         )
 
-        with LoggingContext():
+        with LoggingContext("test-context"):
             failure = self.get_failure(
                 self.handler.on_receive_pdu(
                     "test.serv", lying_event, sent_to_us_directly=True
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 2d1f9360e0..8c082e7432 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -231,8 +231,7 @@ class DescriptorTestCase(unittest.TestCase):
 
         @defer.inlineCallbacks
         def do_lookup():
-            with LoggingContext() as c1:
-                c1.name = "c1"
+            with LoggingContext("c1") as c1:
                 r = yield obj.fn(1)
                 self.assertEqual(current_context(), c1)
             return r
@@ -274,8 +273,7 @@ class DescriptorTestCase(unittest.TestCase):
 
         @defer.inlineCallbacks
         def do_lookup():
-            with LoggingContext() as c1:
-                c1.name = "c1"
+            with LoggingContext("c1") as c1:
                 try:
                     d = obj.fn(1)
                     self.assertEqual(
-- 
cgit 1.5.1