From 8756d5c87efc5637da55c9e21d2a4eb2369ba693 Mon Sep 17 00:00:00 2001 From: Quentin Gliech Date: Wed, 26 Oct 2022 12:45:41 +0200 Subject: Save login tokens in database (#13844) * Save login tokens in database Signed-off-by: Quentin Gliech * Add upgrade notes * Track login token reuse in a Prometheus metric Signed-off-by: Quentin Gliech --- docs/upgrade.md | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'docs/upgrade.md') diff --git a/docs/upgrade.md b/docs/upgrade.md index b81385b191..78c34d0c15 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,15 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.71.0 + +## Removal of the `generate_short_term_login_token` module API method + +As announced with the release of [Synapse 1.69.0](#deprecation-of-the-generate_short_term_login_token-module-api-method), the deprecated `generate_short_term_login_token` module method has been removed. + +Modules relying on it can instead use the `create_login_token` method. + + # Upgrading to v1.69.0 ## Changes to the receipts replication streams -- cgit 1.5.1 From aa70556699e649f46f51a198fb104eecdc0d311b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Oct 2022 13:29:23 -0500 Subject: Check appservice user interest against the local users instead of all users (`get_users_in_room` mis-use) (#13958) --- changelog.d/13958.bugfix | 1 + docs/upgrade.md | 19 ++++ synapse/appservice/__init__.py | 16 ++- synapse/storage/databases/main/appservice.py | 17 ++- synapse/storage/databases/main/roommember.py | 3 + tests/appservice/test_appservice.py | 10 +- tests/handlers/test_appservice.py | 162 ++++++++++++++++++++++++++- 7 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 changelog.d/13958.bugfix (limited to 'docs/upgrade.md') diff --git a/changelog.d/13958.bugfix b/changelog.d/13958.bugfix new file mode 100644 index 0000000000..f9f651bfdc --- /dev/null +++ b/changelog.d/13958.bugfix @@ -0,0 +1 @@ +Check appservice user interest against the local users instead of all users in the room to align with [MSC3905](https://github.com/matrix-org/matrix-spec-proposals/pull/3905). diff --git a/docs/upgrade.md b/docs/upgrade.md index 78c34d0c15..f095bbc3a6 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -97,6 +97,25 @@ As announced with the release of [Synapse 1.69.0](#deprecation-of-the-generate_s Modules relying on it can instead use the `create_login_token` method. +## Changes to the events received by application services (interest) + +To align with spec (changed in +[MSC3905](https://github.com/matrix-org/matrix-spec-proposals/pull/3905)), Synapse now +only considers local users to be interesting. In other words, the `users` namespace +regex is only be applied against local users of the homeserver. + +Please note, this probably doesn't affect the expected behavior of your application +service, since an interesting local user in a room still means all messages in the room +(from local or remote users) will still be considered interesting. And matching a room +with the `rooms` or `aliases` namespace regex will still consider all events sent in the +room to be interesting to the application service. + +If one of your application service's `users` regex was intending to match a remote user, +this will no longer match as you expect. The behavioral mismatch between matching all +local users and some remote users is why the spec was changed/clarified and this +caveat is no longer supported. + + # Upgrading to v1.69.0 ## Changes to the receipts replication streams diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 0dfa00df44..500bdde3a9 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -172,12 +172,24 @@ class ApplicationService: Returns: True if this service would like to know about this room. """ - member_list = await store.get_users_in_room( + # We can use `get_local_users_in_room(...)` here because an application service + # can only be interested in local users of the server it's on (ignore any remote + # users that might match the user namespace regex). + # + # In the future, we can consider re-using + # `store.get_app_service_users_in_room` which is very similar to this + # function but has a slightly worse performance than this because we + # have an early escape-hatch if we find a single user that the + # appservice is interested in. The juice would be worth the squeeze if + # `store.get_app_service_users_in_room` was used in more places besides + # an experimental MSC. But for now we can avoid doing more work and + # barely using it later. + local_user_ids = await store.get_local_users_in_room( room_id, on_invalidate=cache_context.invalidate ) # check joined member events - for user_id in member_list: + for user_id in local_user_ids: if self.is_interested_in_user(user_id): return True return False diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 64b70a7b28..63046c0527 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -157,10 +157,23 @@ class ApplicationServiceWorkerStore(RoomMemberWorkerStore): app_service: "ApplicationService", cache_context: _CacheContext, ) -> List[str]: - users_in_room = await self.get_users_in_room( + """ + Get all users in a room that the appservice controls. + + Args: + room_id: The room to check in. + app_service: The application service to check interest/control against + + Returns: + List of user IDs that the appservice controls. + """ + # We can use `get_local_users_in_room(...)` here because an application service + # can only be interested in local users of the server it's on (ignore any remote + # users that might match the user namespace regex). + local_users_in_room = await self.get_local_users_in_room( room_id, on_invalidate=cache_context.invalidate ) - return list(filter(app_service.is_interested_in_user, users_in_room)) + return list(filter(app_service.is_interested_in_user, local_users_in_room)) class ApplicationServiceStore(ApplicationServiceWorkerStore): diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index ab708b0ba5..e56a13f21e 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -152,6 +152,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): the forward extremities of those rooms will exclude most members. We may also calculate room state incorrectly for such rooms and believe that a member is or is not in the room when the opposite is true. + + Note: If you only care about users in the room local to the homeserver, use + `get_local_users_in_room(...)` instead which will be more performant. """ return await self.db_pool.simple_select_onecol( table="current_state_events", diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py index 3018d3fc6f..d4dccfc2f0 100644 --- a/tests/appservice/test_appservice.py +++ b/tests/appservice/test_appservice.py @@ -43,7 +43,7 @@ class ApplicationServiceTestCase(unittest.TestCase): self.store = Mock() self.store.get_aliases_for_room = simple_async_mock([]) - self.store.get_users_in_room = simple_async_mock([]) + self.store.get_local_users_in_room = simple_async_mock([]) @defer.inlineCallbacks def test_regex_user_id_prefix_match(self): @@ -129,7 +129,7 @@ class ApplicationServiceTestCase(unittest.TestCase): self.store.get_aliases_for_room = simple_async_mock( ["#irc_foobar:matrix.org", "#athing:matrix.org"] ) - self.store.get_users_in_room = simple_async_mock([]) + self.store.get_local_users_in_room = simple_async_mock([]) self.assertTrue( ( yield defer.ensureDeferred( @@ -184,7 +184,7 @@ class ApplicationServiceTestCase(unittest.TestCase): self.store.get_aliases_for_room = simple_async_mock( ["#xmpp_foobar:matrix.org", "#athing:matrix.org"] ) - self.store.get_users_in_room = simple_async_mock([]) + self.store.get_local_users_in_room = simple_async_mock([]) self.assertFalse( ( yield defer.ensureDeferred( @@ -203,7 +203,7 @@ class ApplicationServiceTestCase(unittest.TestCase): self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*")) self.event.sender = "@irc_foobar:matrix.org" self.store.get_aliases_for_room = simple_async_mock(["#irc_barfoo:matrix.org"]) - self.store.get_users_in_room = simple_async_mock([]) + self.store.get_local_users_in_room = simple_async_mock([]) self.assertTrue( ( yield defer.ensureDeferred( @@ -236,7 +236,7 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_member_list_match(self): self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*")) # Note that @irc_fo:here is the AS user. - self.store.get_users_in_room = simple_async_mock( + self.store.get_local_users_in_room = simple_async_mock( ["@alice:here", "@irc_fo:here", "@bob:here"] ) self.store.get_aliases_for_room = simple_async_mock([]) diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index 7e4570f990..144e49d0fd 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -22,7 +22,7 @@ from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin import synapse.storage -from synapse.api.constants import EduTypes +from synapse.api.constants import EduTypes, EventTypes from synapse.appservice import ( ApplicationService, TransactionOneTimeKeyCounts, @@ -36,7 +36,7 @@ from synapse.util import Clock from synapse.util.stringutils import random_string from tests import unittest -from tests.test_utils import make_awaitable, simple_async_mock +from tests.test_utils import event_injection, make_awaitable, simple_async_mock from tests.unittest import override_config from tests.utils import MockClock @@ -390,15 +390,16 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase): receipts.register_servlets, ] - def prepare(self, reactor, clock, hs): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer): + self.hs = hs # Mock the ApplicationServiceScheduler's _TransactionController's send method so that # we can track any outgoing ephemeral events self.send_mock = simple_async_mock() - hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock + hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock # type: ignore[assignment] # Mock out application services, and allow defining our own in tests self._services: List[ApplicationService] = [] - self.hs.get_datastores().main.get_app_services = Mock( + self.hs.get_datastores().main.get_app_services = Mock( # type: ignore[assignment] return_value=self._services ) @@ -416,6 +417,157 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase): "exclusive_as_user", "password", self.exclusive_as_user_device_id ) + def _notify_interested_services(self): + # This is normally set in `notify_interested_services` but we need to call the + # internal async version so the reactor gets pushed to completion. + self.hs.get_application_service_handler().current_max += 1 + self.get_success( + self.hs.get_application_service_handler()._notify_interested_services( + RoomStreamToken( + None, self.hs.get_application_service_handler().current_max + ) + ) + ) + + @parameterized.expand( + [ + ("@local_as_user:test", True), + # Defining remote users in an application service user namespace regex is a + # footgun since the appservice might assume that it'll receive all events + # sent by that remote user, but it will only receive events in rooms that + # are shared with a local user. So we just remove this footgun possibility + # entirely and we won't notify the application service based on remote + # users. + ("@remote_as_user:remote", False), + ] + ) + def test_match_interesting_room_members( + self, interesting_user: str, should_notify: bool + ): + """ + Test to make sure that a interesting user (local or remote) in the room is + notified as expected when someone else in the room sends a message. + """ + # Register an application service that's interested in the `interesting_user` + interested_appservice = self._register_application_service( + namespaces={ + ApplicationService.NS_USERS: [ + { + "regex": interesting_user, + "exclusive": False, + }, + ], + }, + ) + + # Create a room + alice = self.register_user("alice", "pass") + alice_access_token = self.login("alice", "pass") + room_id = self.helper.create_room_as(room_creator=alice, tok=alice_access_token) + + # Join the interesting user to the room + self.get_success( + event_injection.inject_member_event( + self.hs, room_id, interesting_user, "join" + ) + ) + # Kick the appservice into checking this membership event to get the event out + # of the way + self._notify_interested_services() + # We don't care about the interesting user join event (this test is making sure + # the next thing works) + self.send_mock.reset_mock() + + # Send a message from an uninteresting user + self.helper.send_event( + room_id, + type=EventTypes.Message, + content={ + "msgtype": "m.text", + "body": "message from uninteresting user", + }, + tok=alice_access_token, + ) + # Kick the appservice into checking this new event + self._notify_interested_services() + + if should_notify: + self.send_mock.assert_called_once() + ( + service, + events, + _ephemeral, + _to_device_messages, + _otks, + _fbks, + _device_list_summary, + ) = self.send_mock.call_args[0] + + # Even though the message came from an uninteresting user, it should still + # notify us because the interesting user is joined to the room where the + # message was sent. + self.assertEqual(service, interested_appservice) + self.assertEqual(events[0]["type"], "m.room.message") + self.assertEqual(events[0]["sender"], alice) + else: + self.send_mock.assert_not_called() + + def test_application_services_receive_events_sent_by_interesting_local_user(self): + """ + Test to make sure that a messages sent from a local user can be interesting and + picked up by the appservice. + """ + # Register an application service that's interested in all local users + interested_appservice = self._register_application_service( + namespaces={ + ApplicationService.NS_USERS: [ + { + "regex": ".*", + "exclusive": False, + }, + ], + }, + ) + + # Create a room + alice = self.register_user("alice", "pass") + alice_access_token = self.login("alice", "pass") + room_id = self.helper.create_room_as(room_creator=alice, tok=alice_access_token) + + # We don't care about interesting events before this (this test is making sure + # the next thing works) + self.send_mock.reset_mock() + + # Send a message from the interesting local user + self.helper.send_event( + room_id, + type=EventTypes.Message, + content={ + "msgtype": "m.text", + "body": "message from interesting local user", + }, + tok=alice_access_token, + ) + # Kick the appservice into checking this new event + self._notify_interested_services() + + self.send_mock.assert_called_once() + ( + service, + events, + _ephemeral, + _to_device_messages, + _otks, + _fbks, + _device_list_summary, + ) = self.send_mock.call_args[0] + + # Events sent from an interesting local user should also be picked up as + # interesting to the appservice. + self.assertEqual(service, interested_appservice) + self.assertEqual(events[0]["type"], "m.room.message") + self.assertEqual(events[0]["sender"], alice) + def test_sending_read_receipt_batches_to_application_services(self): """Tests that a large batch of read receipts are sent correctly to interested application services. -- cgit 1.5.1 From 6546308c1e7d3eff316631a5909151dc6c7a9e1e Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 2 Nov 2022 17:33:45 +0000 Subject: Disable legacy Prometheus metric names by default. They can still be re-enabled for now, but they will be removed altogether in Synapse 1.73.0. (#14353) --- CHANGES.md | 9 +++++++++ changelog.d/14353.removal | 1 + docs/upgrade.md | 16 ++++++++++++++++ docs/usage/configuration/config_documentation.md | 4 ++-- synapse/config/metrics.py | 2 +- 5 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 changelog.d/14353.removal (limited to 'docs/upgrade.md') diff --git a/CHANGES.md b/CHANGES.md index 113ad0d1ee..6bafdd3fad 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse (Next) (2022-11-01) +========================= + +Please note that, as announced in the release notes for Synapse 1.69.0, legacy Prometheus metric names are now disabled by default. +They will be removed altogether in Synapse 1.73.0. +If not already done, server administrators should update their dashboards and alerting rules to avoid using the deprecated metric names. +See the [upgrade notes](https://matrix-org.github.io/synapse/v1.71/upgrade.html#upgrading-to-v1710) for more details. + + Synapse 1.71.0rc1 (2022-11-01) ============================== diff --git a/changelog.d/14353.removal b/changelog.d/14353.removal new file mode 100644 index 0000000000..fc42aa9106 --- /dev/null +++ b/changelog.d/14353.removal @@ -0,0 +1 @@ +Disable legacy Prometheus metric names by default. They can still be re-enabled for now, but they will be removed altogether in Synapse 1.73.0. \ No newline at end of file diff --git a/docs/upgrade.md b/docs/upgrade.md index f095bbc3a6..41b06cc253 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -116,6 +116,22 @@ local users and some remote users is why the spec was changed/clarified and this caveat is no longer supported. +## Legacy Prometheus metric names are now disabled by default + +Synapse v1.71.0 disables legacy Prometheus metric names by default. +For administrators that still rely on them and have not yet had chance to update their +uses of the metrics, it's still possible to specify `enable_legacy_metrics: true` in +the configuration to re-enable them temporarily. + +Synapse v1.73.0 will **remove legacy metric names altogether** and at that point, +it will no longer be possible to re-enable them. + +If you do not use metrics or you have already updated your Grafana dashboard(s), +Prometheus console(s) and alerting rule(s), there is no action needed. + +See [v1.69.0: Deprecation of legacy Prometheus metric names](#deprecation-of-legacy-prometheus-metric-names). + + # Upgrading to v1.69.0 ## Changes to the receipts replication streams diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 44358faf59..9a6bd08d01 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2441,8 +2441,8 @@ enable_metrics: true Set to `true` to publish both legacy and non-legacy Prometheus metric names, or to `false` to only publish non-legacy Prometheus metric names. -Defaults to `true`. Has no effect if `enable_metrics` is `false`. -**In Synapse v1.71.0, this will default to `false` before being removed in Synapse v1.73.0.** +Defaults to `false`. Has no effect if `enable_metrics` is `false`. +**In Synapse v1.67.0 up to and including Synapse v1.70.1, this defaulted to `true`.** Legacy metric names include: - metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules; diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index bb065f9f2f..6034a0346e 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -43,7 +43,7 @@ class MetricsConfig(Config): def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.enable_metrics = config.get("enable_metrics", False) - self.enable_legacy_metrics = config.get("enable_legacy_metrics", True) + self.enable_legacy_metrics = config.get("enable_legacy_metrics", False) self.report_stats = config.get("report_stats", None) self.report_stats_endpoint = config.get( -- cgit 1.5.1 From a5fcdea090c2396c30dd07c357ce4d9c90004c34 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 8 Nov 2022 17:17:13 +0000 Subject: Remove support for PostgreSQL 10 (#14392) Signed-off-by: Sean Quah --- .ci/scripts/calculate_jobs.py | 2 +- .github/workflows/tests.yml | 2 +- changelog.d/14392.removal | 1 + docs/upgrade.md | 10 ++++++++++ synapse/storage/engines/postgres.py | 4 ++-- 5 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14392.removal (limited to 'docs/upgrade.md') diff --git a/.ci/scripts/calculate_jobs.py b/.ci/scripts/calculate_jobs.py index c53d4d5ff1..b48174bea2 100755 --- a/.ci/scripts/calculate_jobs.py +++ b/.ci/scripts/calculate_jobs.py @@ -54,7 +54,7 @@ trial_postgres_tests = [ { "python-version": "3.7", "database": "postgres", - "postgres-version": "10", + "postgres-version": "11", "extras": "all", } ] diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fea33abd12..2bc237a0ba 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -409,7 +409,7 @@ jobs: matrix: include: - python-version: "3.7" - postgres-version: "10" + postgres-version: "11" - python-version: "3.11" postgres-version: "14" diff --git a/changelog.d/14392.removal b/changelog.d/14392.removal new file mode 100644 index 0000000000..e96b3de2bd --- /dev/null +++ b/changelog.d/14392.removal @@ -0,0 +1 @@ +Remove support for PostgreSQL 10. diff --git a/docs/upgrade.md b/docs/upgrade.md index 41b06cc253..2aa353e496 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,16 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.72.0 + +## Dropping support for PostgreSQL 10 + +In line with our [deprecation policy](deprecation_policy.md), we've dropped +support for PostgreSQL 10, as it is no longer supported upstream. + +This release of Synapse requires PostgreSQL 11+. + + # Upgrading to v1.71.0 ## Removal of the `generate_short_term_login_token` module API method diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 9bf74bbf59..0c4fd88914 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -81,8 +81,8 @@ class PostgresEngine( allow_unsafe_locale = self.config.get("allow_unsafe_locale", False) # Are we on a supported PostgreSQL version? - if not allow_outdated_version and self._version < 100000: - raise RuntimeError("Synapse requires PostgreSQL 10 or above.") + if not allow_outdated_version and self._version < 110000: + raise RuntimeError("Synapse requires PostgreSQL 11 or above.") with db_conn.cursor() as txn: txn.execute("SHOW SERVER_ENCODING") -- cgit 1.5.1 From 9af2be192a759c22d189b72cc0a7580cd9de8a37 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 24 Nov 2022 09:09:17 +0000 Subject: Remove legacy Prometheus metrics names. They were deprecated in Synapse v1.69.0 and disabled by default in Synapse v1.71.0. (#14538) --- changelog.d/14538.removal | 1 + docs/upgrade.md | 22 ++ docs/usage/configuration/config_documentation.md | 25 -- synapse/app/_base.py | 16 +- synapse/app/generic_worker.py | 1 - synapse/app/homeserver.py | 1 - synapse/config/metrics.py | 2 - synapse/metrics/__init__.py | 7 +- synapse/metrics/_legacy_exposition.py | 288 ----------------------- synapse/metrics/_twisted_exposition.py | 38 +++ tests/storage/test_event_metrics.py | 7 +- 11 files changed, 70 insertions(+), 338 deletions(-) create mode 100644 changelog.d/14538.removal delete mode 100644 synapse/metrics/_legacy_exposition.py create mode 100644 synapse/metrics/_twisted_exposition.py (limited to 'docs/upgrade.md') diff --git a/changelog.d/14538.removal b/changelog.d/14538.removal new file mode 100644 index 0000000000..d2035ce82a --- /dev/null +++ b/changelog.d/14538.removal @@ -0,0 +1 @@ +Remove legacy Prometheus metrics names. They were deprecated in Synapse v1.69.0 and disabled by default in Synapse v1.71.0. \ No newline at end of file diff --git a/docs/upgrade.md b/docs/upgrade.md index 2aa353e496..4fe9e4f02e 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,28 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.73.0 + +## Legacy Prometheus metric names have now been removed + +Synapse v1.69.0 included the deprecation of legacy Prometheus metric names +and offered an option to disable them. +Synapse v1.71.0 disabled legacy Prometheus metric names by default. + +This version, v1.73.0, removes those legacy Prometheus metric names entirely. +This also means that the `enable_legacy_metrics` configuration option has been +removed; it will no longer be possible to re-enable the legacy metric names. + +If you use metrics and have not yet updated your Grafana dashboard(s), +Prometheus console(s) or alerting rule(s), please consider doing so when upgrading +to this version. +Note that the included Grafana dashboard was updated in v1.72.0 to correct some +metric names which were missed when legacy metrics were disabled by default. + +See [v1.69.0: Deprecation of legacy Prometheus metric names](#deprecation-of-legacy-prometheus-metric-names) +for more context. + + # Upgrading to v1.72.0 ## Dropping support for PostgreSQL 10 diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index f5937dd902..fae2771fad 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2437,31 +2437,6 @@ Example configuration: enable_metrics: true ``` --- -### `enable_legacy_metrics` - -Set to `true` to publish both legacy and non-legacy Prometheus metric names, -or to `false` to only publish non-legacy Prometheus metric names. -Defaults to `false`. Has no effect if `enable_metrics` is `false`. -**In Synapse v1.67.0 up to and including Synapse v1.70.1, this defaulted to `true`.** - -Legacy metric names include: -- metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules; -- counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard. - -These legacy metric names are unconventional and not compliant with OpenMetrics standards. -They are included for backwards compatibility. - -Example configuration: -```yaml -enable_legacy_metrics: false -``` - -See https://github.com/matrix-org/synapse/issues/11106 for context. - -*Since v1.67.0.* - -**Will be removed in v1.73.0.** ---- ### `sentry` Use this option to enable sentry integration. Provide the DSN assigned to you by sentry diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 41d2732ef9..a5aa2185a2 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -266,26 +266,18 @@ def register_start( reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper())) -def listen_metrics( - bind_addresses: Iterable[str], port: int, enable_legacy_metric_names: bool -) -> None: +def listen_metrics(bind_addresses: Iterable[str], port: int) -> None: """ Start Prometheus metrics server. """ from prometheus_client import start_http_server as start_http_server_prometheus - from synapse.metrics import ( - RegistryProxy, - start_http_server as start_http_server_legacy, - ) + from synapse.metrics import RegistryProxy for host in bind_addresses: logger.info("Starting metrics listener on %s:%d", host, port) - if enable_legacy_metric_names: - start_http_server_legacy(port, addr=host, registry=RegistryProxy) - else: - _set_prometheus_client_use_created_metrics(False) - start_http_server_prometheus(port, addr=host, registry=RegistryProxy) + _set_prometheus_client_use_created_metrics(False) + start_http_server_prometheus(port, addr=host, registry=RegistryProxy) def _set_prometheus_client_use_created_metrics(new_value: bool) -> None: diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 74909b7d4a..46dc731696 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -320,7 +320,6 @@ class GenericWorkerServer(HomeServer): _base.listen_metrics( listener.bind_addresses, listener.port, - enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics, ) else: logger.warning("Unsupported listener type: %s", listener.type) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 4f4fee4782..b9be558c7e 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -265,7 +265,6 @@ class SynapseHomeServer(HomeServer): _base.listen_metrics( listener.bind_addresses, listener.port, - enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics, ) else: # this shouldn't happen, as the listener type should have been checked diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 6034a0346e..8c1c9bd12d 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -43,8 +43,6 @@ class MetricsConfig(Config): def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.enable_metrics = config.get("enable_metrics", False) - self.enable_legacy_metrics = config.get("enable_legacy_metrics", False) - self.report_stats = config.get("report_stats", None) self.report_stats_endpoint = config.get( "report_stats_endpoint", "https://matrix.org/report-usage-stats/push" diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index c3d3daf877..b01372565d 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -47,11 +47,7 @@ from twisted.python.threadpool import ThreadPool # This module is imported for its side effects; flake8 needn't warn that it's unused. import synapse.metrics._reactor_metrics # noqa: F401 from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager -from synapse.metrics._legacy_exposition import ( - MetricsResource, - generate_latest, - start_http_server, -) +from synapse.metrics._twisted_exposition import MetricsResource, generate_latest from synapse.metrics._types import Collector from synapse.util import SYNAPSE_VERSION @@ -474,7 +470,6 @@ __all__ = [ "Collector", "MetricsResource", "generate_latest", - "start_http_server", "LaterGauge", "InFlightGauge", "GaugeBucketCollector", diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py deleted file mode 100644 index 1459f9d224..0000000000 --- a/synapse/metrics/_legacy_exposition.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright 2015-2019 Prometheus Python Client Developers -# Copyright 2019 Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This code is based off `prometheus_client/exposition.py` from version 0.7.1. - -Due to the renaming of metrics in prometheus_client 0.4.0, this customised -vendoring of the code will emit both the old versions that Synapse dashboards -expect, and the newer "best practice" version of the up-to-date official client. -""" -import logging -import math -import threading -from http.server import BaseHTTPRequestHandler, HTTPServer -from socketserver import ThreadingMixIn -from typing import Any, Dict, List, Type, Union -from urllib.parse import parse_qs, urlparse - -from prometheus_client import REGISTRY, CollectorRegistry -from prometheus_client.core import Sample - -from twisted.web.resource import Resource -from twisted.web.server import Request - -logger = logging.getLogger(__name__) -CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" - - -def floatToGoString(d: Union[int, float]) -> str: - d = float(d) - if d == math.inf: - return "+Inf" - elif d == -math.inf: - return "-Inf" - elif math.isnan(d): - return "NaN" - else: - s = repr(d) - dot = s.find(".") - # Go switches to exponents sooner than Python. - # We only need to care about positive values for le/quantile. - if d > 0 and dot > 6: - mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.") - return f"{mantissa}e+0{dot - 1}" - return s - - -def sample_line(line: Sample, name: str) -> str: - if line.labels: - labelstr = "{{{0}}}".format( - ",".join( - [ - '{}="{}"'.format( - k, - v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""), - ) - for k, v in sorted(line.labels.items()) - ] - ) - ) - else: - labelstr = "" - timestamp = "" - if line.timestamp is not None: - # Convert to milliseconds. - timestamp = f" {int(float(line.timestamp) * 1000):d}" - return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp) - - -# Mapping from new metric names to legacy metric names. -# We translate these back to their old names when exposing them through our -# legacy vendored exporter. -# Only this legacy exposition module applies these name changes. -LEGACY_METRIC_NAMES = { - "synapse_util_caches_cache_hits": "synapse_util_caches_cache:hits", - "synapse_util_caches_cache_size": "synapse_util_caches_cache:size", - "synapse_util_caches_cache_evicted_size": "synapse_util_caches_cache:evicted_size", - "synapse_util_caches_cache": "synapse_util_caches_cache:total", - "synapse_util_caches_response_cache_size": "synapse_util_caches_response_cache:size", - "synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits", - "synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size", - "synapse_util_caches_response_cache": "synapse_util_caches_response_cache:total", - "synapse_federation_client_sent_pdu_destinations": "synapse_federation_client_sent_pdu_destinations:total", - "synapse_federation_client_sent_pdu_destinations_count": "synapse_federation_client_sent_pdu_destinations:count", - "synapse_admin_mau_current": "synapse_admin_mau:current", - "synapse_admin_mau_max": "synapse_admin_mau:max", - "synapse_admin_mau_registered_reserved_users": "synapse_admin_mau:registered_reserved_users", -} - - -def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes: - """ - Generate metrics in legacy format. Modern metrics are generated directly - by prometheus-client. - """ - - output = [] - - for metric in registry.collect(): - if not metric.samples: - # No samples, don't bother. - continue - - # Translate to legacy metric name if it has one. - mname = LEGACY_METRIC_NAMES.get(metric.name, metric.name) - mnewname = metric.name - mtype = metric.type - - # OpenMetrics -> Prometheus - if mtype == "counter": - mnewname = mnewname + "_total" - elif mtype == "info": - mtype = "gauge" - mnewname = mnewname + "_info" - elif mtype == "stateset": - mtype = "gauge" - elif mtype == "gaugehistogram": - mtype = "histogram" - elif mtype == "unknown": - mtype = "untyped" - - # Output in the old format for compatibility. - if emit_help: - output.append( - "# HELP {} {}\n".format( - mname, - metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), - ) - ) - output.append(f"# TYPE {mname} {mtype}\n") - - om_samples: Dict[str, List[str]] = {} - for s in metric.samples: - for suffix in ["_created", "_gsum", "_gcount"]: - if s.name == mname + suffix: - # OpenMetrics specific sample, put in a gauge at the end. - # (these come from gaugehistograms which don't get renamed, - # so no need to faff with mnewname) - om_samples.setdefault(suffix, []).append(sample_line(s, s.name)) - break - else: - newname = s.name.replace(mnewname, mname) - if ":" in newname and newname.endswith("_total"): - newname = newname[: -len("_total")] - output.append(sample_line(s, newname)) - - for suffix, lines in sorted(om_samples.items()): - if emit_help: - output.append( - "# HELP {}{} {}\n".format( - mname, - suffix, - metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), - ) - ) - output.append(f"# TYPE {mname}{suffix} gauge\n") - output.extend(lines) - - # Get rid of the weird colon things while we're at it - if mtype == "counter": - mnewname = mnewname.replace(":total", "") - mnewname = mnewname.replace(":", "_") - - if mname == mnewname: - continue - - # Also output in the new format, if it's different. - if emit_help: - output.append( - "# HELP {} {}\n".format( - mnewname, - metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), - ) - ) - output.append(f"# TYPE {mnewname} {mtype}\n") - - for s in metric.samples: - # Get rid of the OpenMetrics specific samples (we should already have - # dealt with them above anyway.) - for suffix in ["_created", "_gsum", "_gcount"]: - if s.name == mname + suffix: - break - else: - sample_name = LEGACY_METRIC_NAMES.get(s.name, s.name) - output.append( - sample_line(s, sample_name.replace(":total", "").replace(":", "_")) - ) - - return "".join(output).encode("utf-8") - - -class MetricsHandler(BaseHTTPRequestHandler): - """HTTP handler that gives metrics from ``REGISTRY``.""" - - registry = REGISTRY - - def do_GET(self) -> None: - registry = self.registry - params = parse_qs(urlparse(self.path).query) - - if "help" in params: - emit_help = True - else: - emit_help = False - - try: - output = generate_latest(registry, emit_help=emit_help) - except Exception: - self.send_error(500, "error generating metric output") - raise - try: - self.send_response(200) - self.send_header("Content-Type", CONTENT_TYPE_LATEST) - self.send_header("Content-Length", str(len(output))) - self.end_headers() - self.wfile.write(output) - except BrokenPipeError as e: - logger.warning( - "BrokenPipeError when serving metrics (%s). Did Prometheus restart?", e - ) - - def log_message(self, format: str, *args: Any) -> None: - """Log nothing.""" - - @classmethod - def factory(cls, registry: CollectorRegistry) -> Type: - """Returns a dynamic MetricsHandler class tied - to the passed registry. - """ - # This implementation relies on MetricsHandler.registry - # (defined above and defaulted to REGISTRY). - - # As we have unicode_literals, we need to create a str() - # object for type(). - cls_name = str(cls.__name__) - MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry}) - return MyMetricsHandler - - -class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer): - """Thread per request HTTP server.""" - - # Make worker threads "fire and forget". Beginning with Python 3.7 this - # prevents a memory leak because ``ThreadingMixIn`` starts to gather all - # non-daemon threads in a list in order to join on them at server close. - # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the - # same as Python 3.7's ``ThreadingHTTPServer``. - daemon_threads = True - - -def start_http_server( - port: int, addr: str = "", registry: CollectorRegistry = REGISTRY -) -> None: - """Starts an HTTP server for prometheus metrics as a daemon thread""" - CustomMetricsHandler = MetricsHandler.factory(registry) - httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler) - t = threading.Thread(target=httpd.serve_forever) - t.daemon = True - t.start() - - -class MetricsResource(Resource): - """ - Twisted ``Resource`` that serves prometheus metrics. - """ - - isLeaf = True - - def __init__(self, registry: CollectorRegistry = REGISTRY): - self.registry = registry - - def render_GET(self, request: Request) -> bytes: - request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) - response = generate_latest(self.registry) - request.setHeader(b"Content-Length", str(len(response))) - return response diff --git a/synapse/metrics/_twisted_exposition.py b/synapse/metrics/_twisted_exposition.py new file mode 100644 index 0000000000..0abcd14953 --- /dev/null +++ b/synapse/metrics/_twisted_exposition.py @@ -0,0 +1,38 @@ +# Copyright 2015-2019 Prometheus Python Client Developers +# Copyright 2019 Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from prometheus_client import REGISTRY, CollectorRegistry, generate_latest + +from twisted.web.resource import Resource +from twisted.web.server import Request + +CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" + + +class MetricsResource(Resource): + """ + Twisted ``Resource`` that serves prometheus metrics. + """ + + isLeaf = True + + def __init__(self, registry: CollectorRegistry = REGISTRY): + self.registry = registry + + def render_GET(self, request: Request) -> bytes: + request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) + response = generate_latest(self.registry) + request.setHeader(b"Content-Length", str(len(response))) + return response diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py index 088fbb247b..6f1135eef4 100644 --- a/tests/storage/test_event_metrics.py +++ b/tests/storage/test_event_metrics.py @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from prometheus_client import generate_latest -from synapse.metrics import REGISTRY, generate_latest +from synapse.metrics import REGISTRY from synapse.types import UserID, create_requester from tests.unittest import HomeserverTestCase @@ -53,8 +54,8 @@ class ExtremStatisticsTestCase(HomeserverTestCase): items = list( filter( - lambda x: b"synapse_forward_extremities_" in x, - generate_latest(REGISTRY, emit_help=False).split(b"\n"), + lambda x: b"synapse_forward_extremities_" in x and b"# HELP" not in x, + generate_latest(REGISTRY).split(b"\n"), ) ) -- cgit 1.5.1