diff options
-rw-r--r-- | changelog.d/3722.bugfix | 1 | ||||
-rw-r--r-- | changelog.d/3727.misc | 1 | ||||
-rw-r--r-- | synapse/crypto/keyclient.py | 8 | ||||
-rw-r--r-- | synapse/federation/transport/server.py | 6 | ||||
-rw-r--r-- | synapse/http/request_metrics.py | 13 | ||||
-rw-r--r-- | synapse/metrics/background_process_metrics.py | 26 |
6 files changed, 42 insertions, 13 deletions
diff --git a/changelog.d/3722.bugfix b/changelog.d/3722.bugfix new file mode 100644 index 0000000000..16cbaf76cb --- /dev/null +++ b/changelog.d/3722.bugfix @@ -0,0 +1 @@ +Fix error collecting prometheus metrics when run on dedicated thread due to threading concurrency issues diff --git a/changelog.d/3727.misc b/changelog.d/3727.misc new file mode 100644 index 0000000000..0b83220d90 --- /dev/null +++ b/changelog.d/3727.misc @@ -0,0 +1 @@ +Log failure to authenticate remote servers as warnings (without stack traces) diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py index c20a32096a..e94400b8e2 100644 --- a/synapse/crypto/keyclient.py +++ b/synapse/crypto/keyclient.py @@ -18,7 +18,9 @@ import logging from canonicaljson import json from twisted.internet import defer, reactor +from twisted.internet.error import ConnectError from twisted.internet.protocol import Factory +from twisted.names.error import DomainError from twisted.web.http import HTTPClient from synapse.http.endpoint import matrix_federation_endpoint @@ -47,12 +49,14 @@ def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1): server_response, server_certificate = yield protocol.remote_key defer.returnValue((server_response, server_certificate)) except SynapseKeyClientError as e: - logger.exception("Error getting key for %r" % (server_name,)) + logger.warn("Error getting key for %r: %s", server_name, e) if e.status.startswith("4"): # Don't retry for 4xx responses. raise IOError("Cannot get key for %r" % server_name) + except (ConnectError, DomainError) as e: + logger.warn("Error getting key for %r: %s", server_name, e) except Exception as e: - logger.exception(e) + logger.exception("Error getting key for %r", server_name) raise IOError("Cannot get key for %r" % server_name) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 77969a4f38..7a993fd1cf 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -261,10 +261,10 @@ class BaseFederationServlet(object): except NoAuthenticationError: origin = None if self.REQUIRE_AUTH: - logger.exception("authenticate_request failed") + logger.warn("authenticate_request failed: missing authentication") raise - except Exception: - logger.exception("authenticate_request failed") + except Exception as e: + logger.warn("authenticate_request failed: %s", e) raise if origin: diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 588e280571..72c2654678 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +import threading from prometheus_client.core import Counter, Histogram @@ -111,6 +112,9 @@ in_flight_requests_db_sched_duration = Counter( # The set of all in flight requests, set[RequestMetrics] _in_flight_requests = set() +# Protects the _in_flight_requests set from concurrent accesss +_in_flight_requests_lock = threading.Lock() + def _get_in_flight_counts(): """Returns a count of all in flight requests by (method, server_name) @@ -120,7 +124,8 @@ def _get_in_flight_counts(): """ # Cast to a list to prevent it changing while the Prometheus # thread is collecting metrics - reqs = list(_in_flight_requests) + with _in_flight_requests_lock: + reqs = list(_in_flight_requests) for rm in reqs: rm.update_metrics() @@ -154,10 +159,12 @@ class RequestMetrics(object): # to the "in flight" metrics. self._request_stats = self.start_context.get_resource_usage() - _in_flight_requests.add(self) + with _in_flight_requests_lock: + _in_flight_requests.add(self) def stop(self, time_sec, request): - _in_flight_requests.discard(self) + with _in_flight_requests_lock: + _in_flight_requests.discard(self) context = LoggingContext.current_context() diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index ce678d5f75..167167be0a 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import threading + import six from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily @@ -78,6 +80,9 @@ _background_process_counts = dict() # type: dict[str, int] # of process descriptions that no longer have any active processes. _background_processes = dict() # type: dict[str, set[_BackgroundProcess]] +# A lock that covers the above dicts +_bg_metrics_lock = threading.Lock() + class _Collector(object): """A custom metrics collector for the background process metrics. @@ -92,7 +97,11 @@ class _Collector(object): labels=["name"], ) - for desc, processes in six.iteritems(_background_processes): + # We copy the dict so that it doesn't change from underneath us + with _bg_metrics_lock: + _background_processes_copy = dict(_background_processes) + + for desc, processes in six.iteritems(_background_processes_copy): background_process_in_flight_count.add_metric( (desc,), len(processes), ) @@ -167,19 +176,26 @@ def run_as_background_process(desc, func, *args, **kwargs): """ @defer.inlineCallbacks def run(): - count = _background_process_counts.get(desc, 0) - _background_process_counts[desc] = count + 1 + with _bg_metrics_lock: + count = _background_process_counts.get(desc, 0) + _background_process_counts[desc] = count + 1 + _background_process_start_count.labels(desc).inc() with LoggingContext(desc) as context: context.request = "%s-%i" % (desc, count) proc = _BackgroundProcess(desc, context) - _background_processes.setdefault(desc, set()).add(proc) + + with _bg_metrics_lock: + _background_processes.setdefault(desc, set()).add(proc) + try: yield func(*args, **kwargs) finally: proc.update_metrics() - _background_processes[desc].remove(proc) + + with _bg_metrics_lock: + _background_processes[desc].remove(proc) with PreserveLoggingContext(): return run() |