From e02f4b7de287f49476ee5b60e3e439eb8bb11047 Mon Sep 17 00:00:00 2001 From: Nils Date: Mon, 31 Jul 2023 13:25:06 +0200 Subject: Do not expose Admin API in caddy reverse proxy example (#16027) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nils ANDRÉ-CHANG --- docs/reverse_proxy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md index 06337e7c00..fe9519b4b6 100644 --- a/docs/reverse_proxy.md +++ b/docs/reverse_proxy.md @@ -95,7 +95,7 @@ matrix.example.com { } example.com:8448 { - reverse_proxy localhost:8008 + reverse_proxy /_matrix/* localhost:8008 } ``` -- cgit 1.5.1 From f0a860908ba0309c89c9dba452d99b4f9c6928f7 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 3 Aug 2023 20:36:55 +0200 Subject: Allow config of the backoff algorithm for the federation client. (#15754) Adds three new configuration variables: * destination_min_retry_interval is identical to before (10mn). * destination_retry_multiplier is now 2 instead of 5, the maximum value will be reached slower. * destination_max_retry_interval is one day instead of (essentially) infinity. Capping this will cause destinations to continue to be retried sometimes instead of being lost forever. The previous value was 2 ^ 62 milliseconds. --- changelog.d/15754.misc | 1 + docs/usage/configuration/config_documentation.md | 11 +++++++++ synapse/config/federation.py | 18 +++++++++++++++ synapse/util/retryutils.py | 29 +++++++++++++----------- tests/storage/test_transactions.py | 9 ++++++-- tests/util/test_retryutils.py | 22 +++++++++--------- 6 files changed, 64 insertions(+), 26 deletions(-) create mode 100644 changelog.d/15754.misc (limited to 'docs') diff --git a/changelog.d/15754.misc b/changelog.d/15754.misc new file mode 100644 index 0000000000..4314d415a3 --- /dev/null +++ b/changelog.d/15754.misc @@ -0,0 +1 @@ +Allow for the configuration of the backoff algorithm for federation destinations. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 4e6fcd085a..c32608da2b 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1242,6 +1242,14 @@ like sending a federation transaction. * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts. * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts. +The following options control the retry logic when communicating with a specific homeserver destination. +Unlike the previous configuration options, these values apply across all requests +for a given destination and the state of the backoff is stored in the database. + +* `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m. +* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2. +* `destination_max_retry_interval`: a cap on the backoff. Defaults to a week. + Example configuration: ```yaml federation: @@ -1250,6 +1258,9 @@ federation: max_long_retry_delay: 100s max_short_retries: 5 max_long_retries: 20 + destination_min_retry_interval: 30s + destination_retry_multiplier: 5 + destination_max_retry_interval: 12h ``` --- ## Caching diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 0e1cb8b6e3..97636039b8 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -65,5 +65,23 @@ class FederationConfig(Config): self.max_long_retries = federation_config.get("max_long_retries", 10) self.max_short_retries = federation_config.get("max_short_retries", 3) + # Allow for the configuration of the backoff algorithm used + # when trying to reach an unavailable destination. + # Unlike previous configuration those values applies across + # multiple requests and the state of the backoff is stored on DB. + self.destination_min_retry_interval_ms = Config.parse_duration( + federation_config.get("destination_min_retry_interval", "10m") + ) + self.destination_retry_multiplier = federation_config.get( + "destination_retry_multiplier", 2 + ) + self.destination_max_retry_interval_ms = min( + Config.parse_duration( + federation_config.get("destination_max_retry_interval", "7d") + ), + # Set a hard-limit to not overflow the database column. + 2**62, + ) + _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}} diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index dcc037b982..27e9fc976c 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -27,15 +27,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -# the initial backoff, after the first transaction fails -MIN_RETRY_INTERVAL = 10 * 60 * 1000 - -# how much we multiply the backoff by after each subsequent fail -RETRY_MULTIPLIER = 5 - -# a cap on the backoff. (Essentially none) -MAX_RETRY_INTERVAL = 2**62 - class NotRetryingDestination(Exception): def __init__(self, retry_last_ts: int, retry_interval: int, destination: str): @@ -169,6 +160,16 @@ class RetryDestinationLimiter: self.notifier = notifier self.replication_client = replication_client + self.destination_min_retry_interval_ms = ( + self.store.hs.config.federation.destination_min_retry_interval_ms + ) + self.destination_retry_multiplier = ( + self.store.hs.config.federation.destination_retry_multiplier + ) + self.destination_max_retry_interval_ms = ( + self.store.hs.config.federation.destination_max_retry_interval_ms + ) + def __enter__(self) -> None: pass @@ -220,13 +221,15 @@ class RetryDestinationLimiter: # We couldn't connect. if self.retry_interval: self.retry_interval = int( - self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4) + self.retry_interval + * self.destination_retry_multiplier + * random.uniform(0.8, 1.4) ) - if self.retry_interval >= MAX_RETRY_INTERVAL: - self.retry_interval = MAX_RETRY_INTERVAL + if self.retry_interval >= self.destination_max_retry_interval_ms: + self.retry_interval = self.destination_max_retry_interval_ms else: - self.retry_interval = MIN_RETRY_INTERVAL + self.retry_interval = self.destination_min_retry_interval_ms logger.info( "Connection to %s was unsuccessful (%s(%s)); backoff now %i", diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 2fab84a529..ef06b50dbb 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -17,7 +17,6 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.server import HomeServer from synapse.storage.databases.main.transactions import DestinationRetryTimings from synapse.util import Clock -from synapse.util.retryutils import MAX_RETRY_INTERVAL from tests.unittest import HomeserverTestCase @@ -57,8 +56,14 @@ class TransactionStoreTestCase(HomeserverTestCase): self.get_success(d) def test_large_destination_retry(self) -> None: + max_retry_interval_ms = ( + self.hs.config.federation.destination_max_retry_interval_ms + ) d = self.store.set_destination_retry_timings( - "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL + "example.com", + max_retry_interval_ms, + max_retry_interval_ms, + max_retry_interval_ms, ) self.get_success(d) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index 5f8f4e76b5..1277e1a865 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -11,12 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.retryutils import ( - MIN_RETRY_INTERVAL, - RETRY_MULTIPLIER, - NotRetryingDestination, - get_retry_limiter, -) +from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from tests.unittest import HomeserverTestCase @@ -42,6 +37,11 @@ class RetryLimiterTestCase(HomeserverTestCase): limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) + min_retry_interval_ms = ( + self.hs.config.federation.destination_min_retry_interval_ms + ) + retry_multiplier = self.hs.config.federation.destination_retry_multiplier + self.pump(1) try: with limiter: @@ -57,7 +57,7 @@ class RetryLimiterTestCase(HomeserverTestCase): assert new_timings is not None self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, failure_ts) - self.assertEqual(new_timings.retry_interval, MIN_RETRY_INTERVAL) + self.assertEqual(new_timings.retry_interval, min_retry_interval_ms) # now if we try again we should get a failure self.get_failure( @@ -68,7 +68,7 @@ class RetryLimiterTestCase(HomeserverTestCase): # advance the clock and try again # - self.pump(MIN_RETRY_INTERVAL) + self.pump(min_retry_interval_ms) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1) @@ -87,16 +87,16 @@ class RetryLimiterTestCase(HomeserverTestCase): self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, retry_ts) self.assertGreaterEqual( - new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5 + new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 0.5 ) self.assertLessEqual( - new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0 + new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 2.0 ) # # one more go, with success # - self.reactor.advance(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) + self.reactor.advance(min_retry_interval_ms * retry_multiplier * 2.0) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1) -- cgit 1.5.1 From 0a5f4f766514b84aff84ff17dffd5301a437c797 Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 3 Aug 2023 11:43:51 -0700 Subject: Move support for application service query parameter authorization behind a configuration option (#16017) --- changelog.d/16017.removal | 1 + docs/upgrade.md | 16 ++++- docs/usage/configuration/config_documentation.md | 14 ++++ synapse/appservice/api.py | 34 +++++++--- synapse/config/appservice.py | 8 +++ tests/appservice/test_api.py | 85 ++++++++++++++++++++++-- 6 files changed, 144 insertions(+), 14 deletions(-) create mode 100644 changelog.d/16017.removal (limited to 'docs') diff --git a/changelog.d/16017.removal b/changelog.d/16017.removal new file mode 100644 index 0000000000..6b72442892 --- /dev/null +++ b/changelog.d/16017.removal @@ -0,0 +1 @@ +Move support for application service query parameter authorization behind a configuration option. diff --git a/docs/upgrade.md b/docs/upgrade.md index 5dde6c769e..f50a279e98 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,21 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.90.0 + +## App service query parameter authorization is now a configuration option + +Synapse v1.81.0 deprecated application service authorization via query parameters as this is +considered insecure - and from Synapse v1.71.0 forwards the application service token has also been sent via +[the `Authorization` header](https://spec.matrix.org/v1.6/application-service-api/#authorization)], making the insecure +query parameter authorization redundant. Since removing the ability to continue to use query parameters could break +backwards compatibility it has now been put behind a configuration option, `use_appservice_legacy_authorization`. +This option defaults to false, but can be activated by adding +```yaml +use_appservice_legacy_authorization: true +``` +to your configuration. + # Upgrading to v1.89.0 ## Removal of unspecced `user` property for `/register` @@ -97,7 +112,6 @@ The standard `username` property should be used instead. See the [Application Service specification](https://spec.matrix.org/v1.7/application-service-api/#server-admin-style-permissions) for more information. - # Upgrading to v1.88.0 ## Minimum supported Python version diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index c32608da2b..2987c9332d 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2848,6 +2848,20 @@ Example configuration: ```yaml track_appservice_user_ips: true ``` +--- +### `use_appservice_legacy_authorization` + +Whether to send the application service access tokens via the `access_token` query parameter +per older versions of the Matrix specification. Defaults to false. Set to true to enable sending +access tokens via a query parameter. + +**Enabling this option is considered insecure and is not recommended. ** + +Example configuration: +```yaml +use_appservice_legacy_authorization: true +``` + --- ### `macaroon_secret_key` diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 359999f680..de7a94bf26 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -16,7 +16,6 @@ import logging import urllib.parse from typing import ( TYPE_CHECKING, - Any, Dict, Iterable, List, @@ -25,6 +24,7 @@ from typing import ( Sequence, Tuple, TypeVar, + Union, ) from prometheus_client import Counter @@ -119,6 +119,7 @@ class ApplicationServiceApi(SimpleHttpClient): def __init__(self, hs: "HomeServer"): super().__init__(hs) self.clock = hs.get_clock() + self.config = hs.config.appservice self.protocol_meta_cache: ResponseCache[Tuple[str, str]] = ResponseCache( hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS @@ -132,9 +133,12 @@ class ApplicationServiceApi(SimpleHttpClient): assert service.hs_token is not None try: + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} response = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/users/{urllib.parse.quote(user_id)}", - {"access_token": service.hs_token}, + args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) if response is not None: # just an empty json object @@ -155,9 +159,12 @@ class ApplicationServiceApi(SimpleHttpClient): assert service.hs_token is not None try: + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} response = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/rooms/{urllib.parse.quote(alias)}", - {"access_token": service.hs_token}, + args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) if response is not None: # just an empty json object @@ -190,10 +197,12 @@ class ApplicationServiceApi(SimpleHttpClient): assert service.hs_token is not None try: - args: Mapping[Any, Any] = { - **fields, - b"access_token": service.hs_token, - } + args: Mapping[bytes, Union[List[bytes], str]] = fields + if self.config.use_appservice_legacy_authorization: + args = { + **fields, + b"access_token": service.hs_token, + } response = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/{kind}/{urllib.parse.quote(protocol)}", args=args, @@ -231,9 +240,12 @@ class ApplicationServiceApi(SimpleHttpClient): # This is required by the configuration. assert service.hs_token is not None try: + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} info = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/protocol/{urllib.parse.quote(protocol)}", - {"access_token": service.hs_token}, + args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) @@ -344,10 +356,14 @@ class ApplicationServiceApi(SimpleHttpClient): } try: + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} + await self.put_json( f"{service.url}{APP_SERVICE_PREFIX}/transactions/{urllib.parse.quote(str(txn_id))}", json_body=body, - args={"access_token": service.hs_token}, + args=args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) if logger.isEnabledFor(logging.DEBUG): diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index c2710fdf04..919f81a9b7 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -43,6 +43,14 @@ class AppServiceConfig(Config): ) self.track_appservice_user_ips = config.get("track_appservice_user_ips", False) + self.use_appservice_legacy_authorization = config.get( + "use_appservice_legacy_authorization", False + ) + if self.use_appservice_legacy_authorization: + logger.warning( + "The use of appservice legacy authorization via query params is deprecated" + " and should be considered insecure." + ) def load_appservices( diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py index 807dc2f21c..3c635e3dcb 100644 --- a/tests/appservice/test_api.py +++ b/tests/appservice/test_api.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, List, Mapping, Sequence, Union +from typing import Any, List, Mapping, Optional, Sequence, Union from unittest.mock import Mock from twisted.test.proto_helpers import MemoryReactor @@ -22,6 +22,7 @@ from synapse.types import JsonDict from synapse.util import Clock from tests import unittest +from tests.unittest import override_config PROTOCOL = "myproto" TOKEN = "myastoken" @@ -39,7 +40,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase): hs_token=TOKEN, ) - def test_query_3pe_authenticates_token(self) -> None: + def test_query_3pe_authenticates_token_via_header(self) -> None: """ Tests that 3pe queries to the appservice are authenticated with the appservice's token. @@ -74,12 +75,88 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase): args: Mapping[Any, Any], headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]], ) -> List[JsonDict]: - # Ensure the access token is passed as both a header and query arg. - if not headers.get("Authorization") or not args.get(b"access_token"): + # Ensure the access token is passed as a header. + if not headers or not headers.get("Authorization"): raise RuntimeError("Access token not provided") + # ... and not as a query param + if b"access_token" in args: + raise RuntimeError( + "Access token should not be passed as a query param." + ) self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"]) + self.request_url = url + if url == URL_USER: + return SUCCESS_RESULT_USER + elif url == URL_LOCATION: + return SUCCESS_RESULT_LOCATION + else: + raise RuntimeError( + "URL provided was invalid. This should never be seen." + ) + + # We assign to a method, which mypy doesn't like. + self.api.get_json = Mock(side_effect=get_json) # type: ignore[assignment] + + result = self.get_success( + self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]}) + ) + self.assertEqual(self.request_url, URL_USER) + self.assertEqual(result, SUCCESS_RESULT_USER) + result = self.get_success( + self.api.query_3pe( + self.service, "location", PROTOCOL, {b"some": [b"field"]} + ) + ) + self.assertEqual(self.request_url, URL_LOCATION) + self.assertEqual(result, SUCCESS_RESULT_LOCATION) + + @override_config({"use_appservice_legacy_authorization": True}) + def test_query_3pe_authenticates_token_via_param(self) -> None: + """ + Tests that 3pe queries to the appservice are authenticated + with the appservice's token. + """ + + SUCCESS_RESULT_USER = [ + { + "protocol": PROTOCOL, + "userid": "@a:user", + "fields": { + "more": "fields", + }, + } + ] + SUCCESS_RESULT_LOCATION = [ + { + "protocol": PROTOCOL, + "alias": "#a:room", + "fields": { + "more": "fields", + }, + } + ] + + URL_USER = f"{URL}/_matrix/app/v1/thirdparty/user/{PROTOCOL}" + URL_LOCATION = f"{URL}/_matrix/app/v1/thirdparty/location/{PROTOCOL}" + + self.request_url = None + + async def get_json( + url: str, + args: Mapping[Any, Any], + headers: Optional[ + Mapping[Union[str, bytes], Sequence[Union[str, bytes]]] + ] = None, + ) -> List[JsonDict]: + # Ensure the access token is passed as a both a query param and in the headers. + if not args.get(b"access_token"): + raise RuntimeError("Access token should be provided in query params.") + if not headers or not headers.get("Authorization"): + raise RuntimeError("Access token should be provided in auth headers.") + self.assertEqual(args.get(b"access_token"), TOKEN) + self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"]) self.request_url = url if url == URL_USER: return SUCCESS_RESULT_USER -- cgit 1.5.1 From 9d3713d6d512d30a42456c9af25a3ab1a8865406 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 7 Aug 2023 18:36:04 +0100 Subject: Add notes describing Synapse's streams (#16015) Co-authored-by: Patrick Cloke --- changelog.d/16015.doc | 1 + docs/SUMMARY.md | 1 + docs/development/synapse_architecture/streams.md | 157 +++++++++++++++++++++++ 3 files changed, 159 insertions(+) create mode 100644 changelog.d/16015.doc create mode 100644 docs/development/synapse_architecture/streams.md (limited to 'docs') diff --git a/changelog.d/16015.doc b/changelog.d/16015.doc new file mode 100644 index 0000000000..1113d00dc6 --- /dev/null +++ b/changelog.d/16015.doc @@ -0,0 +1 @@ +Add a internal documentation page describing the ["streams" used within Synapse](https://matrix-org.github.io/synapse/v1.90/development/synapse_architecture/streams.html). diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index a8e5ddad9d..31b3032029 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -97,6 +97,7 @@ - [Cancellation](development/synapse_architecture/cancellation.md) - [Log Contexts](log_contexts.md) - [Replication](replication.md) + - [Streams](development/synapse_architecture/streams.md) - [TCP Replication](tcp_replication.md) - [Faster remote joins](development/synapse_architecture/faster_joins.md) - [Internal Documentation](development/internal_documentation/README.md) diff --git a/docs/development/synapse_architecture/streams.md b/docs/development/synapse_architecture/streams.md new file mode 100644 index 0000000000..bee0b8a8c0 --- /dev/null +++ b/docs/development/synapse_architecture/streams.md @@ -0,0 +1,157 @@ +## Streams + +Synapse has a concept of "streams", which are roughly described in [`id_generators.py`]( + https://github.com/matrix-org/synapse/blob/develop/synapse/storage/util/id_generators.py +). +Generally speaking, streams are a series of notifications that something in Synapse's database has changed that the application might need to respond to. +For example: + +- The events stream reports new events (PDUs) that Synapse creates, or that Synapse accepts from another homeserver. +- The account data stream reports changes to users' [account data](https://spec.matrix.org/v1.7/client-server-api/#client-config). +- The to-device stream reports when a device has a new [to-device message](https://spec.matrix.org/v1.7/client-server-api/#send-to-device-messaging). + +See [`synapse.replication.tcp.streams`]( + https://github.com/matrix-org/synapse/blob/develop/synapse/replication/tcp/streams/__init__.py +) for the full list of streams. + +It is very helpful to understand the streams mechanism when working on any part of Synapse that needs to respond to changes—especially if those changes are made by different workers. +To that end, let's describe streams formally, paraphrasing from the docstring of [`AbstractStreamIdGenerator`]( + https://github.com/matrix-org/synapse/blob/a719b703d9bd0dade2565ddcad0e2f3a7a9d4c37/synapse/storage/util/id_generators.py#L96 +). + +### Definition + +A stream is an append-only log `T1, T2, ..., Tn, ...` of facts[^1] which grows over time. +Only "writers" can add facts to a stream, and there may be multiple writers. + +Each fact has an ID, called its "stream ID". +Readers should only process facts in ascending stream ID order. + +Roughly speaking, each stream is backed by a database table. +It should have a `stream_id` (or similar) bigint column holding stream IDs, plus additional columns as necessary to describe the fact. +Typically, a fact is expressed with a single row in its backing table.[^2] +Within a stream, no two facts may have the same stream_id. + +> _Aside_. Some additional notes on streams' backing tables. +> +> 1. Rich would like to [ditch the backing tables](https://github.com/matrix-org/synapse/issues/13456). +> 2. The backing tables may have other uses. + > For example, the events table serves backs the events stream, and is read when processing new events. + > But old rows are read from the table all the time, whenever Synapse needs to lookup some facts about an event. +> 3. Rich suspects that sometimes the stream is backed by multiple tables, so the stream proper is the union of those tables. + +Stream writers can "reserve" a stream ID, and then later mark it as having being completed. +Stream writers need to track the completion of each stream fact. +In the happy case, completion means a fact has been written to the stream table. +But unhappy cases (e.g. transaction rollback due to an error) also count as completion. +Once completed, the rows written with that stream ID are fixed, and no new rows +will be inserted with that ID. + +### Current stream ID + +For any given stream reader (including writers themselves), we may define a per-writer current stream ID: + +> The current stream ID _for a writer W_ is the largest stream ID such that +> all transactions added by W with equal or smaller ID have completed. + +Similarly, there is a "linear" notion of current stream ID: + +> The "linear" current stream ID is the largest stream ID such that +> all facts (added by any writer) with equal or smaller ID have completed. + +Because different stream readers A and B learn about new facts at different times, A and B may disagree about current stream IDs. +Put differently: we should think of stream readers as being independent of each other, proceeding through a stream of facts at different rates. + +**NB.** For both senses of "current", that if a writer opens a transaction that never completes, the current stream ID will never advance beyond that writer's last written stream ID. + +For single-writer streams, the per-writer current ID and the linear current ID are the same. +Both senses of current ID are monotonic, but they may "skip" or jump over IDs because facts complete out of order. + + +_Example_. +Consider a single-writer stream which is initially at ID 1. + +| Action | Current stream ID | Notes | +|------------|-------------------|-------------------------------------------------| +| | 1 | | +| Reserve 2 | 1 | | +| Reserve 3 | 1 | | +| Complete 3 | 1 | current ID unchanged, waiting for 2 to complete | +| Complete 2 | 3 | current ID jumps from 1 -> 3 | +| Reserve 4 | 3 | | +| Reserve 5 | 3 | | +| Reserve 6 | 3 | | +| Complete 5 | 3 | | +| Complete 4 | 5 | current ID jumps 3->5, even though 6 is pending | +| Complete 6 | 6 | | + + +### Multi-writer streams + +There are two ways to view a multi-writer stream. + +1. Treat it as a collection of distinct single-writer streams, one + for each writer. +2. Treat it as a single stream. + +The single stream (option 2) is conceptually simpler, and easier to represent (a single stream id). +However, it requires each reader to know about the entire set of writers, to ensures that readers don't erroneously advance their current stream position too early and miss a fact from an unknown writer. +In contrast, multiple parallel streams (option 1) are more complex, requiring more state to represent (map from writer to stream id). +The payoff for doing so is that readers can "peek" ahead to facts that completed on one writer no matter the state of the others, reducing latency. + +Note that a multi-writer stream can be viewed in both ways. +For example, the events stream is treated as multiple single-writer streams (option 1) by the sync handler, so that events are sent to clients as soon as possible. +But the background process that works through events treats them as a single linear stream. + +Another useful example is the cache invalidation stream. +The facts this stream holds are instructions to "you should now invalidate these cache entries". +We only ever treat this as a multiple single-writer streams as there is no important ordering between cache invalidations. +(Invalidations are self-contained facts; and the invalidations commute/are idempotent). + +### Writing to streams + +Writers need to track: + - track their current position (i.e. its own per-writer stream ID). + - their facts currently awaiting completion. + +At startup, + - the current position of that writer can be found by querying the database (which suggests that facts need to be written to the database atomically, in a transaction); and + - there are no facts awaiting completion. + +To reserve a stream ID, call [`nextval`](https://www.postgresql.org/docs/current/functions-sequence.html) on the appropriate postgres sequence. + +To write a fact to the stream: insert the appropriate rows to the appropriate backing table. + +To complete a fact, first remove it from your map of facts currently awaiting completion. +Then, if no earlier fact is awaiting completion, the writer can advance its current position in that stream. +Upon doing so it should emit an `RDATA` message[^3], once for every fact between the old and the new stream ID. + +### Subscribing to streams + +Readers need to track the current position of every writer. + +At startup, they can find this by contacting each writer with a `REPLICATE` message, +requesting that all writers reply describing their current position in their streams. +Writers reply with a `POSITION` message. + +To learn about new facts, readers should listen for `RDATA` messages and process them to respond to the new fact. +The `RDATA` itself is not a self-contained representation of the fact; +readers will have to query the stream tables for the full details. +Readers must also advance their record of the writer's current position for that stream. + +# Summary + +In a nutshell: we have an append-only log with a "buffer/scratchpad" at the end where we have to wait for the sequence to be linear and contiguous. + + +--- + +[^1]: we use the word _fact_ here for two reasons. +Firstly, the word "event" is already heavily overloaded (PDUs, EDUs, account data, ...) and we don't need to make that worse. +Secondly, "fact" emphasises that the things we append to a stream cannot change after the fact. + +[^2]: A fact might be expressed with 0 rows, e.g. if we opened a transaction to persist an event, but failed and rolled the transaction back before marking the fact as completed. +In principle a fact might be expressed with 2 or more rows; if so, each of those rows should share the fact's stream ID. + +[^3]: This communication used to happen directly with the writers [over TCP](../../tcp_replication.md); +nowadays it's done via Redis's Pubsub. -- cgit 1.5.1