From e02f4b7de287f49476ee5b60e3e439eb8bb11047 Mon Sep 17 00:00:00 2001
From: Nils <nils@nilsand.re>
Date: Mon, 31 Jul 2023 13:25:06 +0200
Subject: Do not expose Admin API in caddy reverse proxy example (#16027)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Nils ANDRÉ-CHANG <nils@nilsand.re>
---
 docs/reverse_proxy.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'docs')

diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md
index 06337e7c00..fe9519b4b6 100644
--- a/docs/reverse_proxy.md
+++ b/docs/reverse_proxy.md
@@ -95,7 +95,7 @@ matrix.example.com {
 }
 
 example.com:8448 {
-  reverse_proxy localhost:8008
+  reverse_proxy /_matrix/* localhost:8008
 }
 ```
 
-- 
cgit 1.5.1


From f0a860908ba0309c89c9dba452d99b4f9c6928f7 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 3 Aug 2023 20:36:55 +0200
Subject: Allow config of the backoff algorithm for the federation client.
 (#15754)

Adds three new configuration variables:

* destination_min_retry_interval is identical to before (10mn).
* destination_retry_multiplier is now 2 instead of 5, the maximum value will
  be reached slower.
* destination_max_retry_interval is one day instead of (essentially) infinity.

Capping this will cause destinations to continue to be retried sometimes instead
of being lost forever. The previous value was 2 ^ 62 milliseconds.
---
 changelog.d/15754.misc                           |  1 +
 docs/usage/configuration/config_documentation.md | 11 +++++++++
 synapse/config/federation.py                     | 18 +++++++++++++++
 synapse/util/retryutils.py                       | 29 +++++++++++++-----------
 tests/storage/test_transactions.py               |  9 ++++++--
 tests/util/test_retryutils.py                    | 22 +++++++++---------
 6 files changed, 64 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/15754.misc

(limited to 'docs')

diff --git a/changelog.d/15754.misc b/changelog.d/15754.misc
new file mode 100644
index 0000000000..4314d415a3
--- /dev/null
+++ b/changelog.d/15754.misc
@@ -0,0 +1 @@
+Allow for the configuration of the backoff algorithm for federation destinations.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 4e6fcd085a..c32608da2b 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1242,6 +1242,14 @@ like sending a federation transaction.
 * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
 * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
 
+The following options control the retry logic when communicating with a specific homeserver destination.
+Unlike the previous configuration options, these values apply across all requests
+for a given destination and the state of the backoff is stored in the database.
+
+* `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m.
+* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2.
+* `destination_max_retry_interval`: a cap on the backoff. Defaults to a week.
+
 Example configuration:
 ```yaml
 federation:
@@ -1250,6 +1258,9 @@ federation:
   max_long_retry_delay: 100s
   max_short_retries: 5
   max_long_retries: 20
+  destination_min_retry_interval: 30s
+  destination_retry_multiplier: 5
+  destination_max_retry_interval: 12h
 ```
 ---
 ## Caching
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 0e1cb8b6e3..97636039b8 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -65,5 +65,23 @@ class FederationConfig(Config):
         self.max_long_retries = federation_config.get("max_long_retries", 10)
         self.max_short_retries = federation_config.get("max_short_retries", 3)
 
+        # Allow for the configuration of the backoff algorithm used
+        # when trying to reach an unavailable destination.
+        # Unlike previous configuration those values applies across
+        # multiple requests and the state of the backoff is stored on DB.
+        self.destination_min_retry_interval_ms = Config.parse_duration(
+            federation_config.get("destination_min_retry_interval", "10m")
+        )
+        self.destination_retry_multiplier = federation_config.get(
+            "destination_retry_multiplier", 2
+        )
+        self.destination_max_retry_interval_ms = min(
+            Config.parse_duration(
+                federation_config.get("destination_max_retry_interval", "7d")
+            ),
+            # Set a hard-limit to not overflow the database column.
+            2**62,
+        )
+
 
 _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index dcc037b982..27e9fc976c 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -27,15 +27,6 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-# the initial backoff, after the first transaction fails
-MIN_RETRY_INTERVAL = 10 * 60 * 1000
-
-# how much we multiply the backoff by after each subsequent fail
-RETRY_MULTIPLIER = 5
-
-# a cap on the backoff. (Essentially none)
-MAX_RETRY_INTERVAL = 2**62
-
 
 class NotRetryingDestination(Exception):
     def __init__(self, retry_last_ts: int, retry_interval: int, destination: str):
@@ -169,6 +160,16 @@ class RetryDestinationLimiter:
         self.notifier = notifier
         self.replication_client = replication_client
 
+        self.destination_min_retry_interval_ms = (
+            self.store.hs.config.federation.destination_min_retry_interval_ms
+        )
+        self.destination_retry_multiplier = (
+            self.store.hs.config.federation.destination_retry_multiplier
+        )
+        self.destination_max_retry_interval_ms = (
+            self.store.hs.config.federation.destination_max_retry_interval_ms
+        )
+
     def __enter__(self) -> None:
         pass
 
@@ -220,13 +221,15 @@ class RetryDestinationLimiter:
             # We couldn't connect.
             if self.retry_interval:
                 self.retry_interval = int(
-                    self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4)
+                    self.retry_interval
+                    * self.destination_retry_multiplier
+                    * random.uniform(0.8, 1.4)
                 )
 
-                if self.retry_interval >= MAX_RETRY_INTERVAL:
-                    self.retry_interval = MAX_RETRY_INTERVAL
+                if self.retry_interval >= self.destination_max_retry_interval_ms:
+                    self.retry_interval = self.destination_max_retry_interval_ms
             else:
-                self.retry_interval = MIN_RETRY_INTERVAL
+                self.retry_interval = self.destination_min_retry_interval_ms
 
             logger.info(
                 "Connection to %s was unsuccessful (%s(%s)); backoff now %i",
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index 2fab84a529..ef06b50dbb 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -17,7 +17,6 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.server import HomeServer
 from synapse.storage.databases.main.transactions import DestinationRetryTimings
 from synapse.util import Clock
-from synapse.util.retryutils import MAX_RETRY_INTERVAL
 
 from tests.unittest import HomeserverTestCase
 
@@ -57,8 +56,14 @@ class TransactionStoreTestCase(HomeserverTestCase):
         self.get_success(d)
 
     def test_large_destination_retry(self) -> None:
+        max_retry_interval_ms = (
+            self.hs.config.federation.destination_max_retry_interval_ms
+        )
         d = self.store.set_destination_retry_timings(
-            "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL
+            "example.com",
+            max_retry_interval_ms,
+            max_retry_interval_ms,
+            max_retry_interval_ms,
         )
         self.get_success(d)
 
diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
index 5f8f4e76b5..1277e1a865 100644
--- a/tests/util/test_retryutils.py
+++ b/tests/util/test_retryutils.py
@@ -11,12 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from synapse.util.retryutils import (
-    MIN_RETRY_INTERVAL,
-    RETRY_MULTIPLIER,
-    NotRetryingDestination,
-    get_retry_limiter,
-)
+from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
 
 from tests.unittest import HomeserverTestCase
 
@@ -42,6 +37,11 @@ class RetryLimiterTestCase(HomeserverTestCase):
 
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
+        min_retry_interval_ms = (
+            self.hs.config.federation.destination_min_retry_interval_ms
+        )
+        retry_multiplier = self.hs.config.federation.destination_retry_multiplier
+
         self.pump(1)
         try:
             with limiter:
@@ -57,7 +57,7 @@ class RetryLimiterTestCase(HomeserverTestCase):
         assert new_timings is not None
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, failure_ts)
-        self.assertEqual(new_timings.retry_interval, MIN_RETRY_INTERVAL)
+        self.assertEqual(new_timings.retry_interval, min_retry_interval_ms)
 
         # now if we try again we should get a failure
         self.get_failure(
@@ -68,7 +68,7 @@ class RetryLimiterTestCase(HomeserverTestCase):
         # advance the clock and try again
         #
 
-        self.pump(MIN_RETRY_INTERVAL)
+        self.pump(min_retry_interval_ms)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)
@@ -87,16 +87,16 @@ class RetryLimiterTestCase(HomeserverTestCase):
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, retry_ts)
         self.assertGreaterEqual(
-            new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5
+            new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 0.5
         )
         self.assertLessEqual(
-            new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0
+            new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 2.0
         )
 
         #
         # one more go, with success
         #
-        self.reactor.advance(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0)
+        self.reactor.advance(min_retry_interval_ms * retry_multiplier * 2.0)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)
-- 
cgit 1.5.1


From 0a5f4f766514b84aff84ff17dffd5301a437c797 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 3 Aug 2023 11:43:51 -0700
Subject: Move support for application service query parameter authorization
 behind a configuration option (#16017)

---
 changelog.d/16017.removal                        |  1 +
 docs/upgrade.md                                  | 16 ++++-
 docs/usage/configuration/config_documentation.md | 14 ++++
 synapse/appservice/api.py                        | 34 +++++++---
 synapse/config/appservice.py                     |  8 +++
 tests/appservice/test_api.py                     | 85 ++++++++++++++++++++++--
 6 files changed, 144 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/16017.removal

(limited to 'docs')

diff --git a/changelog.d/16017.removal b/changelog.d/16017.removal
new file mode 100644
index 0000000000..6b72442892
--- /dev/null
+++ b/changelog.d/16017.removal
@@ -0,0 +1 @@
+Move support for application service query parameter authorization behind a configuration option.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 5dde6c769e..f50a279e98 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,21 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.90.0
+
+## App service query parameter authorization is now a configuration option
+
+Synapse v1.81.0 deprecated application service authorization via query parameters as this is
+considered insecure - and from Synapse v1.71.0 forwards the application service token has also been sent via 
+[the `Authorization` header](https://spec.matrix.org/v1.6/application-service-api/#authorization)], making the insecure
+query parameter authorization redundant. Since removing the ability to continue to use query parameters could break 
+backwards compatibility it has now been put behind a configuration option, `use_appservice_legacy_authorization`.  
+This option defaults to false, but can be activated by adding 
+```yaml
+use_appservice_legacy_authorization: true 
+```
+to your configuration.
+
 # Upgrading to v1.89.0
 
 ## Removal of unspecced `user` property for `/register`
@@ -97,7 +112,6 @@ The standard `username` property should be used instead. See the
 [Application Service specification](https://spec.matrix.org/v1.7/application-service-api/#server-admin-style-permissions)
 for more information.
 
-
 # Upgrading to v1.88.0
 
 ## Minimum supported Python version
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index c32608da2b..2987c9332d 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2848,6 +2848,20 @@ Example configuration:
 ```yaml
 track_appservice_user_ips: true
 ```
+---
+### `use_appservice_legacy_authorization`
+
+Whether to send the application service access tokens via the `access_token` query parameter
+per older versions of the Matrix specification. Defaults to false. Set to true to enable sending
+access tokens via a query parameter.
+
+**Enabling this option is considered insecure and is not recommended. **
+
+Example configuration:
+```yaml
+use_appservice_legacy_authorization: true 
+```
+
 ---
 ### `macaroon_secret_key`
 
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 359999f680..de7a94bf26 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -16,7 +16,6 @@ import logging
 import urllib.parse
 from typing import (
     TYPE_CHECKING,
-    Any,
     Dict,
     Iterable,
     List,
@@ -25,6 +24,7 @@ from typing import (
     Sequence,
     Tuple,
     TypeVar,
+    Union,
 )
 
 from prometheus_client import Counter
@@ -119,6 +119,7 @@ class ApplicationServiceApi(SimpleHttpClient):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.clock = hs.get_clock()
+        self.config = hs.config.appservice
 
         self.protocol_meta_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
             hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS
@@ -132,9 +133,12 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
+            args = None
+            if self.config.use_appservice_legacy_authorization:
+                args = {"access_token": service.hs_token}
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/users/{urllib.parse.quote(user_id)}",
-                {"access_token": service.hs_token},
+                args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if response is not None:  # just an empty json object
@@ -155,9 +159,12 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
+            args = None
+            if self.config.use_appservice_legacy_authorization:
+                args = {"access_token": service.hs_token}
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/rooms/{urllib.parse.quote(alias)}",
-                {"access_token": service.hs_token},
+                args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if response is not None:  # just an empty json object
@@ -190,10 +197,12 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
-            args: Mapping[Any, Any] = {
-                **fields,
-                b"access_token": service.hs_token,
-            }
+            args: Mapping[bytes, Union[List[bytes], str]] = fields
+            if self.config.use_appservice_legacy_authorization:
+                args = {
+                    **fields,
+                    b"access_token": service.hs_token,
+                }
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/{kind}/{urllib.parse.quote(protocol)}",
                 args=args,
@@ -231,9 +240,12 @@ class ApplicationServiceApi(SimpleHttpClient):
             # This is required by the configuration.
             assert service.hs_token is not None
             try:
+                args = None
+                if self.config.use_appservice_legacy_authorization:
+                    args = {"access_token": service.hs_token}
                 info = await self.get_json(
                     f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/protocol/{urllib.parse.quote(protocol)}",
-                    {"access_token": service.hs_token},
+                    args,
                     headers={"Authorization": [f"Bearer {service.hs_token}"]},
                 )
 
@@ -344,10 +356,14 @@ class ApplicationServiceApi(SimpleHttpClient):
                 }
 
         try:
+            args = None
+            if self.config.use_appservice_legacy_authorization:
+                args = {"access_token": service.hs_token}
+
             await self.put_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/transactions/{urllib.parse.quote(str(txn_id))}",
                 json_body=body,
-                args={"access_token": service.hs_token},
+                args=args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if logger.isEnabledFor(logging.DEBUG):
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index c2710fdf04..919f81a9b7 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -43,6 +43,14 @@ class AppServiceConfig(Config):
             )
 
         self.track_appservice_user_ips = config.get("track_appservice_user_ips", False)
+        self.use_appservice_legacy_authorization = config.get(
+            "use_appservice_legacy_authorization", False
+        )
+        if self.use_appservice_legacy_authorization:
+            logger.warning(
+                "The use of appservice legacy authorization via query params is deprecated"
+                " and should be considered insecure."
+            )
 
 
 def load_appservices(
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 807dc2f21c..3c635e3dcb 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, List, Mapping, Sequence, Union
+from typing import Any, List, Mapping, Optional, Sequence, Union
 from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -22,6 +22,7 @@ from synapse.types import JsonDict
 from synapse.util import Clock
 
 from tests import unittest
+from tests.unittest import override_config
 
 PROTOCOL = "myproto"
 TOKEN = "myastoken"
@@ -39,7 +40,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             hs_token=TOKEN,
         )
 
-    def test_query_3pe_authenticates_token(self) -> None:
+    def test_query_3pe_authenticates_token_via_header(self) -> None:
         """
         Tests that 3pe queries to the appservice are authenticated
         with the appservice's token.
@@ -74,12 +75,88 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             args: Mapping[Any, Any],
             headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
         ) -> List[JsonDict]:
-            # Ensure the access token is passed as both a header and query arg.
-            if not headers.get("Authorization") or not args.get(b"access_token"):
+            # Ensure the access token is passed as a header.
+            if not headers or not headers.get("Authorization"):
                 raise RuntimeError("Access token not provided")
+            # ... and not as a query param
+            if b"access_token" in args:
+                raise RuntimeError(
+                    "Access token should not be passed as a query param."
+                )
 
             self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
+            self.request_url = url
+            if url == URL_USER:
+                return SUCCESS_RESULT_USER
+            elif url == URL_LOCATION:
+                return SUCCESS_RESULT_LOCATION
+            else:
+                raise RuntimeError(
+                    "URL provided was invalid. This should never be seen."
+                )
+
+        # We assign to a method, which mypy doesn't like.
+        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[assignment]
+
+        result = self.get_success(
+            self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]})
+        )
+        self.assertEqual(self.request_url, URL_USER)
+        self.assertEqual(result, SUCCESS_RESULT_USER)
+        result = self.get_success(
+            self.api.query_3pe(
+                self.service, "location", PROTOCOL, {b"some": [b"field"]}
+            )
+        )
+        self.assertEqual(self.request_url, URL_LOCATION)
+        self.assertEqual(result, SUCCESS_RESULT_LOCATION)
+
+    @override_config({"use_appservice_legacy_authorization": True})
+    def test_query_3pe_authenticates_token_via_param(self) -> None:
+        """
+        Tests that 3pe queries to the appservice are authenticated
+        with the appservice's token.
+        """
+
+        SUCCESS_RESULT_USER = [
+            {
+                "protocol": PROTOCOL,
+                "userid": "@a:user",
+                "fields": {
+                    "more": "fields",
+                },
+            }
+        ]
+        SUCCESS_RESULT_LOCATION = [
+            {
+                "protocol": PROTOCOL,
+                "alias": "#a:room",
+                "fields": {
+                    "more": "fields",
+                },
+            }
+        ]
+
+        URL_USER = f"{URL}/_matrix/app/v1/thirdparty/user/{PROTOCOL}"
+        URL_LOCATION = f"{URL}/_matrix/app/v1/thirdparty/location/{PROTOCOL}"
+
+        self.request_url = None
+
+        async def get_json(
+            url: str,
+            args: Mapping[Any, Any],
+            headers: Optional[
+                Mapping[Union[str, bytes], Sequence[Union[str, bytes]]]
+            ] = None,
+        ) -> List[JsonDict]:
+            # Ensure the access token is passed as a both a query param and in the headers.
+            if not args.get(b"access_token"):
+                raise RuntimeError("Access token should be provided in query params.")
+            if not headers or not headers.get("Authorization"):
+                raise RuntimeError("Access token should be provided in auth headers.")
+
             self.assertEqual(args.get(b"access_token"), TOKEN)
+            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
             self.request_url = url
             if url == URL_USER:
                 return SUCCESS_RESULT_USER
-- 
cgit 1.5.1


From 9d3713d6d512d30a42456c9af25a3ab1a8865406 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 7 Aug 2023 18:36:04 +0100
Subject: Add notes describing Synapse's streams (#16015)

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/16015.doc                            |   1 +
 docs/SUMMARY.md                                  |   1 +
 docs/development/synapse_architecture/streams.md | 157 +++++++++++++++++++++++
 3 files changed, 159 insertions(+)
 create mode 100644 changelog.d/16015.doc
 create mode 100644 docs/development/synapse_architecture/streams.md

(limited to 'docs')

diff --git a/changelog.d/16015.doc b/changelog.d/16015.doc
new file mode 100644
index 0000000000..1113d00dc6
--- /dev/null
+++ b/changelog.d/16015.doc
@@ -0,0 +1 @@
+Add a internal documentation page describing the ["streams" used within Synapse](https://matrix-org.github.io/synapse/v1.90/development/synapse_architecture/streams.html).
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index a8e5ddad9d..31b3032029 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -97,6 +97,7 @@
     - [Cancellation](development/synapse_architecture/cancellation.md)
     - [Log Contexts](log_contexts.md)
     - [Replication](replication.md)
+    - [Streams](development/synapse_architecture/streams.md)
     - [TCP Replication](tcp_replication.md)
     - [Faster remote joins](development/synapse_architecture/faster_joins.md)
   - [Internal Documentation](development/internal_documentation/README.md)
diff --git a/docs/development/synapse_architecture/streams.md b/docs/development/synapse_architecture/streams.md
new file mode 100644
index 0000000000..bee0b8a8c0
--- /dev/null
+++ b/docs/development/synapse_architecture/streams.md
@@ -0,0 +1,157 @@
+## Streams
+
+Synapse has a concept of "streams", which are roughly described in [`id_generators.py`](
+    https://github.com/matrix-org/synapse/blob/develop/synapse/storage/util/id_generators.py
+).
+Generally speaking, streams are a series of notifications that something in Synapse's database has changed that the application might need to respond to.
+For example:
+
+- The events stream reports new events (PDUs) that Synapse creates, or that Synapse accepts from another homeserver.
+- The account data stream reports changes to users' [account data](https://spec.matrix.org/v1.7/client-server-api/#client-config).
+- The to-device stream reports when a device has a new [to-device message](https://spec.matrix.org/v1.7/client-server-api/#send-to-device-messaging).
+
+See [`synapse.replication.tcp.streams`](
+    https://github.com/matrix-org/synapse/blob/develop/synapse/replication/tcp/streams/__init__.py
+) for the full list of streams.
+
+It is very helpful to understand the streams mechanism when working on any part of Synapse that needs to respond to changes—especially if those changes are made by different workers.
+To that end, let's describe streams formally, paraphrasing from the docstring of [`AbstractStreamIdGenerator`](
+    https://github.com/matrix-org/synapse/blob/a719b703d9bd0dade2565ddcad0e2f3a7a9d4c37/synapse/storage/util/id_generators.py#L96
+).
+
+### Definition
+
+A stream is an append-only log `T1, T2, ..., Tn, ...` of facts[^1] which grows over time.
+Only "writers" can add facts to a stream, and there may be multiple writers.
+
+Each fact has an ID, called its "stream ID".
+Readers should only process facts in ascending stream ID order.
+
+Roughly speaking, each stream is backed by a database table.
+It should have a `stream_id` (or similar) bigint column holding stream IDs, plus additional columns as necessary to describe the fact.
+Typically, a fact is expressed with a single row in its backing table.[^2]
+Within a stream, no two facts may have the same stream_id.
+
+> _Aside_. Some additional notes on streams' backing tables.
+>
+> 1. Rich would like to [ditch the backing tables](https://github.com/matrix-org/synapse/issues/13456).
+> 2. The backing tables may have other uses.
+     >    For example, the events table serves backs the events stream, and is read when processing new events.
+     >    But old rows are read from the table all the time, whenever Synapse needs to lookup some facts about an event.
+> 3. Rich suspects that sometimes the stream is backed by multiple tables, so the stream proper is the union of those tables.
+
+Stream writers can "reserve" a stream ID, and then later mark it as having being completed.
+Stream writers need to track the completion of each stream fact.
+In the happy case, completion means a fact has been written to the stream table.
+But unhappy cases (e.g. transaction rollback due to an error) also count as completion.
+Once completed, the rows written with that stream ID are fixed, and no new rows
+will be inserted with that ID.
+
+### Current stream ID
+
+For any given stream reader (including writers themselves), we may define a per-writer current stream ID:
+
+> The current stream ID _for a writer W_ is the largest stream ID such that
+> all transactions added by W with equal or smaller ID have completed.
+
+Similarly, there is a "linear" notion of current stream ID:
+
+> The "linear" current stream ID is the largest stream ID such that
+> all facts (added by any writer) with equal or smaller ID have completed.
+
+Because different stream readers A and B learn about new facts at different times, A and B may disagree about current stream IDs.
+Put differently: we should think of stream readers as being independent of each other, proceeding through a stream of facts at different rates.
+
+**NB.** For both senses of "current", that if a writer opens a transaction that never completes, the current stream ID will never advance beyond that writer's last written stream ID.
+
+For single-writer streams, the per-writer current ID and the linear current ID are the same.
+Both senses of current ID are monotonic, but they may "skip" or jump over IDs because facts complete out of order.
+
+
+_Example_.
+Consider a single-writer stream which is initially at ID 1.
+
+| Action     | Current stream ID | Notes                                           |
+|------------|-------------------|-------------------------------------------------|
+|            | 1                 |                                                 |
+| Reserve 2  | 1                 |                                                 |
+| Reserve 3  | 1                 |                                                 |
+| Complete 3 | 1                 | current ID unchanged, waiting for 2 to complete |
+| Complete 2 | 3                 | current ID jumps from 1 -> 3                    |
+| Reserve 4  | 3                 |                                                 |
+| Reserve 5  | 3                 |                                                 |
+| Reserve 6  | 3                 |                                                 |
+| Complete 5 | 3                 |                                                 |
+| Complete 4 | 5                 | current ID jumps 3->5, even though 6 is pending |
+| Complete 6 | 6                 |                                                 |
+
+
+### Multi-writer streams
+
+There are two ways to view a multi-writer stream.
+
+1. Treat it as a collection of distinct single-writer streams, one
+   for each writer.
+2. Treat it as a single stream.
+
+The single stream (option 2) is conceptually simpler, and easier to represent (a single stream id).
+However, it requires each reader to know about the entire set of writers, to ensures that readers don't erroneously advance their current stream position too early and miss a fact from an unknown writer.
+In contrast, multiple parallel streams (option 1) are more complex, requiring more state to represent (map from writer to stream id).
+The payoff for doing so is that readers can "peek" ahead to facts that completed on one writer no matter the state of the others, reducing latency.
+
+Note that a multi-writer stream can be viewed in both ways.
+For example, the events stream is treated as multiple single-writer streams (option 1) by the sync handler, so that events are sent to clients as soon as possible.
+But the background process that works through events treats them as a single linear stream.
+
+Another useful example is the cache invalidation stream.
+The facts this stream holds are instructions to "you should now invalidate these cache entries".
+We only ever treat this as a multiple single-writer streams as there is no important ordering between cache invalidations.
+(Invalidations are self-contained facts; and the invalidations commute/are idempotent).
+
+### Writing to streams
+
+Writers need to track:
+ - track their current position (i.e. its own per-writer stream ID).
+ - their facts currently awaiting completion.
+
+At startup, 
+ - the current position of that writer can be found by querying the database (which suggests that facts need to be written to the database atomically, in a transaction); and
+ - there are no facts awaiting completion.
+
+To reserve a stream ID, call [`nextval`](https://www.postgresql.org/docs/current/functions-sequence.html) on the appropriate postgres sequence.
+
+To write a fact to the stream: insert the appropriate rows to the appropriate backing table.
+
+To complete a fact, first remove it from your map of facts currently awaiting completion.
+Then, if no earlier fact is awaiting completion, the writer can advance its current position in that stream.
+Upon doing so it should emit an `RDATA` message[^3], once for every fact between the old and the new stream ID.
+
+### Subscribing to streams
+
+Readers need to track the current position of every writer.
+
+At startup, they can find this by contacting each writer with a `REPLICATE` message,
+requesting that all writers reply describing their current position in their streams.
+Writers reply with a `POSITION` message.
+
+To learn about new facts, readers should listen for `RDATA` messages and process them to respond to the new fact.
+The `RDATA` itself is not a self-contained representation of the fact;
+readers will have to query the stream tables for the full details.
+Readers must also advance their record of the writer's current position for that stream.
+
+# Summary
+
+In a nutshell: we have an append-only log with a "buffer/scratchpad" at the end where we have to wait for the sequence to be linear and contiguous.
+
+
+---
+
+[^1]: we use the word _fact_ here for two reasons.
+Firstly, the word "event" is already heavily overloaded (PDUs, EDUs, account data, ...) and we don't need to make that worse.
+Secondly, "fact" emphasises that the things we append to a stream cannot change after the fact.
+
+[^2]: A fact might be expressed with 0 rows, e.g. if we opened a transaction to persist an event, but failed and rolled the transaction back before marking the fact as completed.
+In principle a fact might be expressed with 2 or more rows; if so, each of those rows should share the fact's stream ID.
+
+[^3]: This communication used to happen directly with the writers [over TCP](../../tcp_replication.md);
+nowadays it's done via Redis's Pubsub.
-- 
cgit 1.5.1