From 20fb08ec803c324a58e0f972935a27debaac133f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 29 Sep 2023 14:52:48 +0300 Subject: Downgrade repl stream time out error to warning (#16401) This is because if a worker reaches ~100% CPU then everything starts lagging and we hit the log line a lot. When at error we invoke sentry and that has a lot of overhead, which then puts even more pressure on the worker. --- changelog.d/16401.misc | 1 + synapse/replication/tcp/client.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/16401.misc diff --git a/changelog.d/16401.misc b/changelog.d/16401.misc new file mode 100644 index 0000000000..86d2749a08 --- /dev/null +++ b/changelog.d/16401.misc @@ -0,0 +1 @@ +Downgrade replication stream time out error log lines to warning. diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 1c7946522a..f4f2b29e96 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -339,7 +339,7 @@ class ReplicationDataHandler: try: await make_deferred_yieldable(deferred) except defer.TimeoutError: - logger.error( + logger.warning( "Timed out waiting for repl stream %r to reach %s (%s)" "; currently at: %s", stream_name, -- cgit 1.4.1