From 7564b8e118aa764fd0075f0d69910a5b2cd58182 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 29 Nov 2021 15:37:56 +0000 Subject: Update the media repository documentation (#11415) --- docs/media_repository.md | 89 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 19 deletions(-) (limited to 'docs') diff --git a/docs/media_repository.md b/docs/media_repository.md index 99ee8f1ef7..ba17f8a856 100644 --- a/docs/media_repository.md +++ b/docs/media_repository.md @@ -2,29 +2,80 @@ *Synapse implementation-specific details for the media repository* -The media repository is where attachments and avatar photos are stored. -It stores attachment content and thumbnails for media uploaded by local users. -It caches attachment content and thumbnails for media uploaded by remote users. +The media repository + * stores avatars, attachments and their thumbnails for media uploaded by local + users. + * caches avatars, attachments and their thumbnails for media uploaded by remote + users. + * caches resources and thumbnails used for + [URL previews](development/url_previews.md). -## Storage +All media in Matrix can be identified by a unique +[MXC URI](https://spec.matrix.org/latest/client-server-api/#matrix-content-mxc-uris), +consisting of a server name and media ID: +``` +mxc:/// +``` -Each item of media is assigned a `media_id` when it is uploaded. -The `media_id` is a randomly chosen, URL safe 24 character string. +## Local Media +Synapse generates 24 character media IDs for content uploaded by local users. +These media IDs consist of upper and lowercase letters and are case-sensitive. +Other homeserver implementations may generate media IDs differently. -Metadata such as the MIME type, upload time and length are stored in the -sqlite3 database indexed by `media_id`. +Local media is recorded in the `local_media_repository` table, which includes +metadata such as MIME types, upload times and file sizes. +Note that this table is shared by the URL cache, which has a different media ID +scheme. -Content is stored on the filesystem under a `"local_content"` directory. +### Paths +A file with media ID `aabbcccccccccccccccccccc` and its `128x96` `image/jpeg` +thumbnail, created by scaling, would be stored at: +``` +local_content/aa/bb/cccccccccccccccccccc +local_thumbnails/aa/bb/cccccccccccccccccccc/128-96-image-jpeg-scale +``` -Thumbnails are stored under a `"local_thumbnails"` directory. +## Remote Media +When media from a remote homeserver is requested from Synapse, it is assigned +a local `filesystem_id`, with the same format as locally-generated media IDs, +as described above. -The item with `media_id` `"aabbccccccccdddddddddddd"` is stored under -`"local_content/aa/bb/ccccccccdddddddddddd"`. Its thumbnail with width -`128` and height `96` and type `"image/jpeg"` is stored under -`"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"` +A record of remote media is stored in the `remote_media_cache` table, which +can be used to map remote MXC URIs (server names and media IDs) to local +`filesystem_id`s. -Remote content is cached under `"remote_content"` directory. Each item of -remote content is assigned a local `"filesystem_id"` to ensure that the -directory structure `"remote_content/server_name/aa/bb/ccccccccdddddddddddd"` -is appropriate. Thumbnails for remote content are stored under -`"remote_thumbnail/server_name/..."` +### Paths +A file from `matrix.org` with `filesystem_id` `aabbcccccccccccccccccccc` and its +`128x96` `image/jpeg` thumbnail, created by scaling, would be stored at: +``` +remote_content/matrix.org/aa/bb/cccccccccccccccccccc +remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg-scale +``` +Older thumbnails may omit the thumbnailing method: +``` +remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg +``` + +Note that `remote_thumbnail/` does not have an `s`. + +## URL Previews +See [URL Previews](development/url_previews.md) for documentation on the URL preview +process. + +When generating previews for URLs, Synapse may download and cache various +resources, including images. These resources are assigned temporary media IDs +of the form `yyyy-mm-dd_aaaaaaaaaaaaaaaa`, where `yyyy-mm-dd` is the current +date and `aaaaaaaaaaaaaaaa` is a random sequence of 16 case-sensitive letters. + +The metadata for these cached resources is stored in the +`local_media_repository` and `local_media_repository_url_cache` tables. + +Resources for URL previews are deleted after a few days. + +### Paths +The file with media ID `yyyy-mm-dd_aaaaaaaaaaaaaaaa` and its `128x96` +`image/jpeg` thumbnail, created by scaling, would be stored at: +``` +url_cache/yyyy-mm-dd/aaaaaaaaaaaaaaaa +url_cache_thumbnails/yyyy-mm-dd/aaaaaaaaaaaaaaaa/128-96-image-jpeg-scale +``` -- cgit 1.5.1 From d08ef6f155971717d2c6dbd78c89312afd4d84fa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Nov 2021 16:57:06 +0000 Subject: Make background updates controllable via a plugin (#11306) Co-authored-by: Brendan Abolivier --- changelog.d/11306.feature | 1 + .../background_update_controller_callbacks.md | 71 ++++++++ docs/modules/writing_a_module.md | 12 +- setup.py | 4 +- synapse/module_api/__init__.py | 54 +++++- synapse/storage/background_updates.py | 192 ++++++++++++++++++--- tests/push/test_email.py | 9 +- tests/rest/admin/test_background_updates.py | 2 +- tests/storage/test_background_update.py | 104 +++++++++-- tests/storage/test_event_chain.py | 4 +- tests/storage/test_user_directory.py | 5 +- tests/unittest.py | 10 +- 12 files changed, 407 insertions(+), 61 deletions(-) create mode 100644 changelog.d/11306.feature create mode 100644 docs/modules/background_update_controller_callbacks.md (limited to 'docs') diff --git a/changelog.d/11306.feature b/changelog.d/11306.feature new file mode 100644 index 0000000000..aba3292015 --- /dev/null +++ b/changelog.d/11306.feature @@ -0,0 +1 @@ +Add plugin support for controlling database background updates. diff --git a/docs/modules/background_update_controller_callbacks.md b/docs/modules/background_update_controller_callbacks.md new file mode 100644 index 0000000000..b3e7c259f4 --- /dev/null +++ b/docs/modules/background_update_controller_callbacks.md @@ -0,0 +1,71 @@ +# Background update controller callbacks + +Background update controller callbacks allow module developers to control (e.g. rate-limit) +how database background updates are run. A database background update is an operation +Synapse runs on its database in the background after it starts. It's usually used to run +database operations that would take too long if they were run at the same time as schema +updates (which are run on startup) and delay Synapse's startup too much: populating a +table with a big amount of data, adding an index on a big table, deleting superfluous data, +etc. + +Background update controller callbacks can be registered using the module API's +`register_background_update_controller_callbacks` method. Only the first module (in order +of appearance in Synapse's configuration file) calling this method can register background +update controller callbacks, subsequent calls are ignored. + +The available background update controller callbacks are: + +### `on_update` + +_First introduced in Synapse v1.49.0_ + +```python +def on_update(update_name: str, database_name: str, one_shot: bool) -> AsyncContextManager[int] +``` + +Called when about to do an iteration of a background update. The module is given the name +of the update, the name of the database, and a flag to indicate whether the background +update will happen in one go and may take a long time (e.g. creating indices). If this last +argument is set to `False`, the update will be run in batches. + +The module must return an async context manager. It will be entered before Synapse runs a +background update; this should return the desired duration of the iteration, in +milliseconds. + +The context manager will be exited when the iteration completes. Note that the duration +returned by the context manager is a target, and an iteration may take substantially longer +or shorter. If the `one_shot` flag is set to `True`, the duration returned is ignored. + +__Note__: Unlike most module callbacks in Synapse, this one is _synchronous_. This is +because asynchronous operations are expected to be run by the async context manager. + +This callback is required when registering any other background update controller callback. + +### `default_batch_size` + +_First introduced in Synapse v1.49.0_ + +```python +async def default_batch_size(update_name: str, database_name: str) -> int +``` + +Called before the first iteration of a background update, with the name of the update and +of the database. The module must return the number of elements to process in this first +iteration. + +If this callback is not defined, Synapse will use a default value of 100. + +### `min_batch_size` + +_First introduced in Synapse v1.49.0_ + +```python +async def min_batch_size(update_name: str, database_name: str) -> int +``` + +Called before running a new batch for a background update, with the name of the update and +of the database. The module must return an integer representing the minimum number of +elements to process in this iteration. This number must be at least 1, and is used to +ensure that progress is always made. + +If this callback is not defined, Synapse will use a default value of 100. diff --git a/docs/modules/writing_a_module.md b/docs/modules/writing_a_module.md index 7764e06692..e7c0ffad58 100644 --- a/docs/modules/writing_a_module.md +++ b/docs/modules/writing_a_module.md @@ -71,15 +71,15 @@ Modules **must** register their web resources in their `__init__` method. ## Registering a callback Modules can use Synapse's module API to register callbacks. Callbacks are functions that -Synapse will call when performing specific actions. Callbacks must be asynchronous, and -are split in categories. A single module may implement callbacks from multiple categories, -and is under no obligation to implement all callbacks from the categories it registers -callbacks for. +Synapse will call when performing specific actions. Callbacks must be asynchronous (unless +specified otherwise), and are split in categories. A single module may implement callbacks +from multiple categories, and is under no obligation to implement all callbacks from the +categories it registers callbacks for. Modules can register callbacks using one of the module API's `register_[...]_callbacks` methods. The callback functions are passed to these methods as keyword arguments, with -the callback name as the argument name and the function as its value. This is demonstrated -in the example below. A `register_[...]_callbacks` method exists for each category. +the callback name as the argument name and the function as its value. A +`register_[...]_callbacks` method exists for each category. Callbacks for each category can be found on their respective page of the [Synapse documentation website](https://matrix-org.github.io/synapse). \ No newline at end of file diff --git a/setup.py b/setup.py index 0ce8beb004..ad99b3bd2c 100755 --- a/setup.py +++ b/setup.py @@ -119,7 +119,9 @@ CONDITIONAL_REQUIREMENTS["mypy"] = [ # Tests assume that all optional dependencies are installed. # # parameterized_class decorator was introduced in parameterized 0.7.0 -CONDITIONAL_REQUIREMENTS["test"] = ["parameterized>=0.7.0"] +# +# We use `mock` library as that backports `AsyncMock` to Python 3.6 +CONDITIONAL_REQUIREMENTS["test"] = ["parameterized>=0.7.0", "mock>=4.0.0"] CONDITIONAL_REQUIREMENTS["dev"] = ( CONDITIONAL_REQUIREMENTS["lint"] diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 19e570ede2..a8154168be 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -82,10 +82,19 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_json_object_from_request from synapse.http.site import SynapseRequest -from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.logging.context import ( + defer_to_thread, + make_deferred_yieldable, + run_in_background, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.client.login import LoginResponse from synapse.storage import DataStore +from synapse.storage.background_updates import ( + DEFAULT_BATCH_SIZE_CALLBACK, + MIN_BATCH_SIZE_CALLBACK, + ON_UPDATE_CALLBACK, +) from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter @@ -311,6 +320,24 @@ class ModuleApi: auth_checkers=auth_checkers, ) + def register_background_update_controller_callbacks( + self, + on_update: ON_UPDATE_CALLBACK, + default_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None, + min_batch_size: Optional[MIN_BATCH_SIZE_CALLBACK] = None, + ) -> None: + """Registers background update controller callbacks. + + Added in Synapse v1.49.0. + """ + + for db in self._hs.get_datastores().databases: + db.updates.register_update_controller_callbacks( + on_update=on_update, + default_batch_size=default_batch_size, + min_batch_size=min_batch_size, + ) + def register_web_resource(self, path: str, resource: Resource) -> None: """Registers a web resource to be served at the given path. @@ -995,6 +1022,11 @@ class ModuleApi: f, ) + async def sleep(self, seconds: float) -> None: + """Sleeps for the given number of seconds.""" + + await self._clock.sleep(seconds) + async def send_mail( self, recipient: str, @@ -1149,6 +1181,26 @@ class ModuleApi: return {key: state_events[event_id] for key, event_id in state_ids.items()} + async def defer_to_thread( + self, + f: Callable[..., T], + *args: Any, + **kwargs: Any, + ) -> T: + """Runs the given function in a separate thread from Synapse's thread pool. + + Added in Synapse v1.49.0. + + Args: + f: The function to run. + args: The function's arguments. + kwargs: The function's keyword arguments. + + Returns: + The return value of the function once ran in a thread. + """ + return await defer_to_thread(self._hs.get_reactor(), f, *args, **kwargs) + class PublicRoomListManager: """Contains methods for adding to, removing from and querying whether a room diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index bc8364400d..d64910aded 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -12,12 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Iterable, Optional +from typing import ( + TYPE_CHECKING, + AsyncContextManager, + Awaitable, + Callable, + Dict, + Iterable, + Optional, +) + +import attr from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.types import Connection from synapse.types import JsonDict -from synapse.util import json_encoder +from synapse.util import Clock, json_encoder from . import engines @@ -28,6 +38,45 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +ON_UPDATE_CALLBACK = Callable[[str, str, bool], AsyncContextManager[int]] +DEFAULT_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]] +MIN_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]] + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _BackgroundUpdateHandler: + """A handler for a given background update. + + Attributes: + callback: The function to call to make progress on the background + update. + oneshot: Wether the update is likely to happen all in one go, ignoring + the supplied target duration, e.g. index creation. This is used by + the update controller to help correctly schedule the update. + """ + + callback: Callable[[JsonDict, int], Awaitable[int]] + oneshot: bool = False + + +class _BackgroundUpdateContextManager: + BACKGROUND_UPDATE_INTERVAL_MS = 1000 + BACKGROUND_UPDATE_DURATION_MS = 100 + + def __init__(self, sleep: bool, clock: Clock): + self._sleep = sleep + self._clock = clock + + async def __aenter__(self) -> int: + if self._sleep: + await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000) + + return self.BACKGROUND_UPDATE_DURATION_MS + + async def __aexit__(self, *exc) -> None: + pass + + class BackgroundUpdatePerformance: """Tracks the how long a background update is taking to update its items""" @@ -84,20 +133,22 @@ class BackgroundUpdater: MINIMUM_BACKGROUND_BATCH_SIZE = 1 DEFAULT_BACKGROUND_BATCH_SIZE = 100 - BACKGROUND_UPDATE_INTERVAL_MS = 1000 - BACKGROUND_UPDATE_DURATION_MS = 100 def __init__(self, hs: "HomeServer", database: "DatabasePool"): self._clock = hs.get_clock() self.db_pool = database + self._database_name = database.name() + # if a background update is currently running, its name. self._current_background_update: Optional[str] = None + self._on_update_callback: Optional[ON_UPDATE_CALLBACK] = None + self._default_batch_size_callback: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None + self._min_batch_size_callback: Optional[MIN_BATCH_SIZE_CALLBACK] = None + self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {} - self._background_update_handlers: Dict[ - str, Callable[[JsonDict, int], Awaitable[int]] - ] = {} + self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {} self._all_done = False # Whether we're currently running updates @@ -107,6 +158,83 @@ class BackgroundUpdater: # enable/disable background updates via the admin API. self.enabled = True + def register_update_controller_callbacks( + self, + on_update: ON_UPDATE_CALLBACK, + default_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None, + min_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None, + ) -> None: + """Register callbacks from a module for each hook.""" + if self._on_update_callback is not None: + logger.warning( + "More than one module tried to register callbacks for controlling" + " background updates. Only the callbacks registered by the first module" + " (in order of appearance in Synapse's configuration file) that tried to" + " do so will be called." + ) + + return + + self._on_update_callback = on_update + + if default_batch_size is not None: + self._default_batch_size_callback = default_batch_size + + if min_batch_size is not None: + self._min_batch_size_callback = min_batch_size + + def _get_context_manager_for_update( + self, + sleep: bool, + update_name: str, + database_name: str, + oneshot: bool, + ) -> AsyncContextManager[int]: + """Get a context manager to run a background update with. + + If a module has registered a `update_handler` callback, use the context manager + it returns. + + Otherwise, returns a context manager that will return a default value, optionally + sleeping if needed. + + Args: + sleep: Whether we can sleep between updates. + update_name: The name of the update. + database_name: The name of the database the update is being run on. + oneshot: Whether the update will complete all in one go, e.g. index creation. + In such cases the returned target duration is ignored. + + Returns: + The target duration in milliseconds that the background update should run for. + + Note: this is a *target*, and an iteration may take substantially longer or + shorter. + """ + if self._on_update_callback is not None: + return self._on_update_callback(update_name, database_name, oneshot) + + return _BackgroundUpdateContextManager(sleep, self._clock) + + async def _default_batch_size(self, update_name: str, database_name: str) -> int: + """The batch size to use for the first iteration of a new background + update. + """ + if self._default_batch_size_callback is not None: + return await self._default_batch_size_callback(update_name, database_name) + + return self.DEFAULT_BACKGROUND_BATCH_SIZE + + async def _min_batch_size(self, update_name: str, database_name: str) -> int: + """A lower bound on the batch size of a new background update. + + Used to ensure that progress is always made. Must be greater than 0. + """ + if self._min_batch_size_callback is not None: + return await self._min_batch_size_callback(update_name, database_name) + + return self.MINIMUM_BACKGROUND_BATCH_SIZE + def get_current_update(self) -> Optional[BackgroundUpdatePerformance]: """Returns the current background update, if any.""" @@ -135,13 +263,8 @@ class BackgroundUpdater: try: logger.info("Starting background schema updates") while self.enabled: - if sleep: - await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.0) - try: - result = await self.do_next_background_update( - self.BACKGROUND_UPDATE_DURATION_MS - ) + result = await self.do_next_background_update(sleep) except Exception: logger.exception("Error doing update") else: @@ -203,13 +326,15 @@ class BackgroundUpdater: return not update_exists - async def do_next_background_update(self, desired_duration_ms: float) -> bool: + async def do_next_background_update(self, sleep: bool = True) -> bool: """Does some amount of work on the next queued background update Returns once some amount of work is done. Args: - desired_duration_ms: How long we want to spend updating. + sleep: Whether to limit how quickly we run background updates or + not. + Returns: True if we have finished running all the background updates, otherwise False """ @@ -252,7 +377,19 @@ class BackgroundUpdater: self._current_background_update = upd["update_name"] - await self._do_background_update(desired_duration_ms) + # We have a background update to run, otherwise we would have returned + # early. + assert self._current_background_update is not None + update_info = self._background_update_handlers[self._current_background_update] + + async with self._get_context_manager_for_update( + sleep=sleep, + update_name=self._current_background_update, + database_name=self._database_name, + oneshot=update_info.oneshot, + ) as desired_duration_ms: + await self._do_background_update(desired_duration_ms) + return False async def _do_background_update(self, desired_duration_ms: float) -> int: @@ -260,7 +397,7 @@ class BackgroundUpdater: update_name = self._current_background_update logger.info("Starting update batch on background update '%s'", update_name) - update_handler = self._background_update_handlers[update_name] + update_handler = self._background_update_handlers[update_name].callback performance = self._background_update_performance.get(update_name) @@ -273,9 +410,14 @@ class BackgroundUpdater: if items_per_ms is not None: batch_size = int(desired_duration_ms * items_per_ms) # Clamp the batch size so that we always make progress - batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE) + batch_size = max( + batch_size, + await self._min_batch_size(update_name, self._database_name), + ) else: - batch_size = self.DEFAULT_BACKGROUND_BATCH_SIZE + batch_size = await self._default_batch_size( + update_name, self._database_name + ) progress_json = await self.db_pool.simple_select_one_onecol( "background_updates", @@ -294,6 +436,8 @@ class BackgroundUpdater: duration_ms = time_stop - time_start + performance.update(items_updated, duration_ms) + logger.info( "Running background update %r. Processed %r items in %rms." " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)", @@ -306,8 +450,6 @@ class BackgroundUpdater: batch_size, ) - performance.update(items_updated, duration_ms) - return len(self._background_update_performance) def register_background_update_handler( @@ -331,7 +473,9 @@ class BackgroundUpdater: update_name: The name of the update that this code handles. update_handler: The function that does the update. """ - self._background_update_handlers[update_name] = update_handler + self._background_update_handlers[update_name] = _BackgroundUpdateHandler( + update_handler + ) def register_noop_background_update(self, update_name: str) -> None: """Register a noop handler for a background update. @@ -453,7 +597,9 @@ class BackgroundUpdater: await self._end_background_update(update_name) return 1 - self.register_background_update_handler(update_name, updater) + self._background_update_handlers[update_name] = _BackgroundUpdateHandler( + updater, oneshot=True + ) async def _end_background_update(self, update_name: str) -> None: """Removes a completed background update task from the queue. diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 90f800e564..f8cba7b645 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -128,6 +128,7 @@ class EmailPusherTests(HomeserverTestCase): ) self.auth_handler = hs.get_auth_handler() + self.store = hs.get_datastore() def test_need_validated_email(self): """Test that we can only add an email pusher if the user has validated @@ -408,13 +409,7 @@ class EmailPusherTests(HomeserverTestCase): self.hs.get_datastore().db_pool.updates._all_done = False # Now let's actually drive the updates to completion - while not self.get_success( - self.hs.get_datastore().db_pool.updates.has_completed_background_updates() - ): - self.get_success( - self.hs.get_datastore().db_pool.updates.do_next_background_update(100), - by=0.1, - ) + self.wait_for_background_updates() # Check that all pushers with unlinked addresses were deleted pushers = self.get_success( diff --git a/tests/rest/admin/test_background_updates.py b/tests/rest/admin/test_background_updates.py index cd5c60b65c..62f242baf6 100644 --- a/tests/rest/admin/test_background_updates.py +++ b/tests/rest/admin/test_background_updates.py @@ -135,7 +135,7 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): self._register_bg_update() self.store.db_pool.updates.start_doing_background_updates() - self.reactor.pump([1.0, 1.0]) + self.reactor.pump([1.0, 1.0, 1.0]) channel = self.make_request( "GET", diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index a5f5ebad41..216d816d56 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -1,8 +1,11 @@ -from unittest.mock import Mock +from mock import Mock + +from twisted.internet.defer import Deferred, ensureDeferred from synapse.storage.background_updates import BackgroundUpdater from tests import unittest +from tests.test_utils import make_awaitable class BackgroundUpdateTestCase(unittest.HomeserverTestCase): @@ -20,10 +23,10 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): def test_do_background_update(self): # the time we claim it takes to update one item when running the update - duration_ms = 4200 + duration_ms = 10 # the target runtime for each bg update - target_background_update_duration_ms = 5000000 + target_background_update_duration_ms = 100 store = self.hs.get_datastore() self.get_success( @@ -48,10 +51,8 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): self.update_handler.side_effect = update self.update_handler.reset_mock() res = self.get_success( - self.updates.do_next_background_update( - target_background_update_duration_ms - ), - by=0.1, + self.updates.do_next_background_update(False), + by=0.01, ) self.assertFalse(res) @@ -74,16 +75,93 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): self.update_handler.side_effect = update self.update_handler.reset_mock() - result = self.get_success( - self.updates.do_next_background_update(target_background_update_duration_ms) - ) + result = self.get_success(self.updates.do_next_background_update(False)) self.assertFalse(result) self.update_handler.assert_called_once() # third step: we don't expect to be called any more self.update_handler.reset_mock() - result = self.get_success( - self.updates.do_next_background_update(target_background_update_duration_ms) - ) + result = self.get_success(self.updates.do_next_background_update(False)) self.assertTrue(result) self.assertFalse(self.update_handler.called) + + +class BackgroundUpdateControllerTestCase(unittest.HomeserverTestCase): + def prepare(self, reactor, clock, homeserver): + self.updates: BackgroundUpdater = self.hs.get_datastore().db_pool.updates + # the base test class should have run the real bg updates for us + self.assertTrue( + self.get_success(self.updates.has_completed_background_updates()) + ) + + self.update_deferred = Deferred() + self.update_handler = Mock(return_value=self.update_deferred) + self.updates.register_background_update_handler( + "test_update", self.update_handler + ) + + # Mock out the AsyncContextManager + self._update_ctx_manager = Mock(spec=["__aenter__", "__aexit__"]) + self._update_ctx_manager.__aenter__ = Mock( + return_value=make_awaitable(None), + ) + self._update_ctx_manager.__aexit__ = Mock(return_value=make_awaitable(None)) + + # Mock out the `update_handler` callback + self._on_update = Mock(return_value=self._update_ctx_manager) + + # Define a default batch size value that's not the same as the internal default + # value (100). + self._default_batch_size = 500 + + # Register the callbacks with more mocks + self.hs.get_module_api().register_background_update_controller_callbacks( + on_update=self._on_update, + min_batch_size=Mock(return_value=make_awaitable(self._default_batch_size)), + default_batch_size=Mock( + return_value=make_awaitable(self._default_batch_size), + ), + ) + + def test_controller(self): + store = self.hs.get_datastore() + self.get_success( + store.db_pool.simple_insert( + "background_updates", + values={"update_name": "test_update", "progress_json": "{}"}, + ) + ) + + # Set the return value for the context manager. + enter_defer = Deferred() + self._update_ctx_manager.__aenter__ = Mock(return_value=enter_defer) + + # Start the background update. + do_update_d = ensureDeferred(self.updates.do_next_background_update(True)) + + self.pump() + + # `run_update` should have been called, but the update handler won't be + # called until the `enter_defer` (returned by `__aenter__`) is resolved. + self._on_update.assert_called_once_with( + "test_update", + "master", + False, + ) + self.assertFalse(do_update_d.called) + self.assertFalse(self.update_deferred.called) + + # Resolving the `enter_defer` should call the update handler, which then + # blocks. + enter_defer.callback(100) + self.pump() + self.update_handler.assert_called_once_with({}, self._default_batch_size) + self.assertFalse(self.update_deferred.called) + self._update_ctx_manager.__aexit__.assert_not_called() + + # Resolving the update handler deferred should cause the + # `do_next_background_update` to finish and return + self.update_deferred.callback(100) + self.pump() + self._update_ctx_manager.__aexit__.assert_called() + self.get_success(do_update_d) diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index b31c5eb5ec..7b7f6c349e 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -664,7 +664,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): ): iterations += 1 self.get_success( - self.store.db_pool.updates.do_next_background_update(100), by=0.1 + self.store.db_pool.updates.do_next_background_update(False), by=0.1 ) # Ensure that we did actually take multiple iterations to process the @@ -723,7 +723,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): ): iterations += 1 self.get_success( - self.store.db_pool.updates.do_next_background_update(100), by=0.1 + self.store.db_pool.updates.do_next_background_update(False), by=0.1 ) # Ensure that we did actually take multiple iterations to process the diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 37cf7bb232..7f5b28aed8 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -23,6 +23,7 @@ from synapse.rest import admin from synapse.rest.client import login, register, room from synapse.server import HomeServer from synapse.storage import DataStore +from synapse.storage.background_updates import _BackgroundUpdateHandler from synapse.storage.roommember import ProfileInfo from synapse.util import Clock @@ -391,7 +392,9 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): with mock.patch.dict( self.store.db_pool.updates._background_update_handlers, - populate_user_directory_process_users=mocked_process_users, + populate_user_directory_process_users=_BackgroundUpdateHandler( + mocked_process_users, + ), ): self._purge_and_rebuild_user_dir() diff --git a/tests/unittest.py b/tests/unittest.py index 165aafc574..eea0903f05 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -331,17 +331,16 @@ class HomeserverTestCase(TestCase): time.sleep(0.01) def wait_for_background_updates(self) -> None: - """ - Block until all background database updates have completed. + """Block until all background database updates have completed. - Note that callers must ensure that's a store property created on the + Note that callers must ensure there's a store property created on the testcase. """ while not self.get_success( self.store.db_pool.updates.has_completed_background_updates() ): self.get_success( - self.store.db_pool.updates.do_next_background_update(100), by=0.1 + self.store.db_pool.updates.do_next_background_update(False), by=0.1 ) def make_homeserver(self, reactor, clock): @@ -500,8 +499,7 @@ class HomeserverTestCase(TestCase): async def run_bg_updates(): with LoggingContext("run_bg_updates"): - while not await stor.db_pool.updates.has_completed_background_updates(): - await stor.db_pool.updates.do_next_background_update(1) + self.get_success(stor.db_pool.updates.run_background_updates(False)) hs = setup_test_homeserver(self.addCleanup, *args, **kwargs) stor = hs.get_datastore() -- cgit 1.5.1 From a4521ce0a8d252e77ca8bd261ecf40ba67511a31 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 29 Nov 2021 14:32:20 -0500 Subject: Support the stable /hierarchy endpoint from MSC2946 (#11329) This also makes additional updates where the implementation had drifted from the approved MSC. Unstable endpoints will be removed at a later data. --- .github/workflows/tests.yml | 2 +- changelog.d/11329.feature | 1 + docs/workers.md | 4 +- scripts-dev/complement.sh | 2 +- synapse/app/homeserver.py | 1 + synapse/federation/federation_client.py | 31 ++++++-- synapse/federation/transport/client.py | 22 +++++- synapse/federation/transport/server/federation.py | 6 +- synapse/handlers/room_summary.py | 14 +++- synapse/rest/client/room.py | 8 +- tests/handlers/test_room_summary.py | 94 ++++++++++++++++------- 11 files changed, 134 insertions(+), 51 deletions(-) create mode 100644 changelog.d/11329.feature (limited to 'docs') diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3bee4ee9f3..21c9ee7823 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -374,7 +374,7 @@ jobs: working-directory: complement/dockerfiles # Run Complement - - run: go test -v -tags synapse_blacklist,msc2403,msc2946 ./tests/... + - run: go test -v -tags synapse_blacklist,msc2403 ./tests/... env: COMPLEMENT_BASE_IMAGE: complement-synapse:latest working-directory: complement diff --git a/changelog.d/11329.feature b/changelog.d/11329.feature new file mode 100644 index 0000000000..7e0efb3b00 --- /dev/null +++ b/changelog.d/11329.feature @@ -0,0 +1 @@ +Support the stable API endpoints for [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946): the room `/hierarchy` endpoint. diff --git a/docs/workers.md b/docs/workers.md index 17c8bfeef6..fd83e2ddeb 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -210,7 +210,7 @@ expressions: ^/_matrix/federation/v1/get_groups_publicised$ ^/_matrix/key/v2/query ^/_matrix/federation/unstable/org.matrix.msc2946/spaces/ - ^/_matrix/federation/unstable/org.matrix.msc2946/hierarchy/ + ^/_matrix/federation/(v1|unstable/org.matrix.msc2946)/hierarchy/ # Inbound federation transaction request ^/_matrix/federation/v1/send/ @@ -223,7 +223,7 @@ expressions: ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$ ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$ ^/_matrix/client/unstable/org.matrix.msc2946/rooms/.*/spaces$ - ^/_matrix/client/unstable/org.matrix.msc2946/rooms/.*/hierarchy$ + ^/_matrix/client/(v1|unstable/org.matrix.msc2946)/rooms/.*/hierarchy$ ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ ^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$ ^/_matrix/client/(api/v1|r0|v3|unstable)/devices$ diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 972244f4c9..53295b58fc 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... +go test -v -tags synapse_blacklist,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7e09530ad2..52541faab2 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -194,6 +194,7 @@ class SynapseHomeServer(HomeServer): { "/_matrix/client/api/v1": client_resource, "/_matrix/client/r0": client_resource, + "/_matrix/client/v1": client_resource, "/_matrix/client/v3": client_resource, "/_matrix/client/unstable": client_resource, "/_matrix/client/v2_alpha": client_resource, diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 3b85b135e0..bc3f96c1fc 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -1395,11 +1395,28 @@ class FederationClient(FederationBase): async def send_request( destination: str, ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[str]]: - res = await self.transport_layer.get_room_hierarchy( - destination=destination, - room_id=room_id, - suggested_only=suggested_only, - ) + try: + res = await self.transport_layer.get_room_hierarchy( + destination=destination, + room_id=room_id, + suggested_only=suggested_only, + ) + except HttpResponseException as e: + # If an error is received that is due to an unrecognised endpoint, + # fallback to the unstable endpoint. Otherwise consider it a + # legitmate error and raise. + if not self._is_unknown_endpoint(e): + raise + + logger.debug( + "Couldn't fetch room hierarchy with the v1 API, falling back to the unstable API" + ) + + res = await self.transport_layer.get_room_hierarchy_unstable( + destination=destination, + room_id=room_id, + suggested_only=suggested_only, + ) room = res.get("room") if not isinstance(room, dict): @@ -1449,6 +1466,10 @@ class FederationClient(FederationBase): if e.code != 502: raise + logger.debug( + "Couldn't fetch room hierarchy, falling back to the spaces API" + ) + # Fallback to the old federation API and translate the results if # no servers implement the new API. # diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 0fea221165..fe29bcfd4b 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -1192,10 +1192,24 @@ class TransportLayerClient: ) async def get_room_hierarchy( - self, - destination: str, - room_id: str, - suggested_only: bool, + self, destination: str, room_id: str, suggested_only: bool + ) -> JsonDict: + """ + Args: + destination: The remote server + room_id: The room ID to ask about. + suggested_only: if True, only suggested rooms will be returned + """ + path = _create_v1_path("/hierarchy/%s", room_id) + + return await self.client.get_json( + destination=destination, + path=path, + args={"suggested_only": "true" if suggested_only else "false"}, + ) + + async def get_room_hierarchy_unstable( + self, destination: str, room_id: str, suggested_only: bool ) -> JsonDict: """ Args: diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index 2fdf6cc99e..66e915228c 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -611,7 +611,6 @@ class FederationSpaceSummaryServlet(BaseFederationServlet): class FederationRoomHierarchyServlet(BaseFederationServlet): - PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" PATH = "/hierarchy/(?P[^/]*)" def __init__( @@ -637,6 +636,10 @@ class FederationRoomHierarchyServlet(BaseFederationServlet): ) +class FederationRoomHierarchyUnstableServlet(FederationRoomHierarchyServlet): + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" + + class RoomComplexityServlet(BaseFederationServlet): """ Indicates to other servers how complex (and therefore likely @@ -701,6 +704,7 @@ FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( RoomComplexityServlet, FederationSpaceSummaryServlet, FederationRoomHierarchyServlet, + FederationRoomHierarchyUnstableServlet, FederationV1SendKnockServlet, FederationMakeKnockServlet, ) diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index 8181cc0b52..b2cfe537df 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -36,8 +36,9 @@ from synapse.api.errors import ( SynapseError, UnsupportedRoomVersionError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.events import EventBase -from synapse.types import JsonDict +from synapse.types import JsonDict, Requester from synapse.util.caches.response_cache import ResponseCache if TYPE_CHECKING: @@ -93,6 +94,9 @@ class RoomSummaryHandler: self._event_serializer = hs.get_event_client_serializer() self._server_name = hs.hostname self._federation_client = hs.get_federation_client() + self._ratelimiter = Ratelimiter( + store=self._store, clock=hs.get_clock(), rate_hz=5, burst_count=10 + ) # If a user tries to fetch the same page multiple times in quick succession, # only process the first attempt and return its result to subsequent requests. @@ -249,7 +253,7 @@ class RoomSummaryHandler: async def get_room_hierarchy( self, - requester: str, + requester: Requester, requested_room_id: str, suggested_only: bool = False, max_depth: Optional[int] = None, @@ -276,6 +280,8 @@ class RoomSummaryHandler: Returns: The JSON hierarchy dictionary. """ + await self._ratelimiter.ratelimit(requester) + # If a user tries to fetch the same page multiple times in quick succession, # only process the first attempt and return its result to subsequent requests. # @@ -283,7 +289,7 @@ class RoomSummaryHandler: # to process multiple requests for the same page will result in errors. return await self._pagination_response_cache.wrap( ( - requester, + requester.user.to_string(), requested_room_id, suggested_only, max_depth, @@ -291,7 +297,7 @@ class RoomSummaryHandler: from_token, ), self._get_room_hierarchy, - requester, + requester.user.to_string(), requested_room_id, suggested_only, max_depth, diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 955d4e8641..73d0f7c950 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -1138,12 +1138,12 @@ class RoomSpaceSummaryRestServlet(RestServlet): class RoomHierarchyRestServlet(RestServlet): - PATTERNS = ( + PATTERNS = [ re.compile( - "^/_matrix/client/unstable/org.matrix.msc2946" + "^/_matrix/client/(v1|unstable/org.matrix.msc2946)" "/rooms/(?P[^/]*)/hierarchy$" ), - ) + ] def __init__(self, hs: "HomeServer"): super().__init__() @@ -1168,7 +1168,7 @@ class RoomHierarchyRestServlet(RestServlet): ) return 200, await self._room_summary_handler.get_room_hierarchy( - requester.user.to_string(), + requester, room_id, suggested_only=parse_boolean(request, "suggested_only", default=False), max_depth=max_depth, diff --git a/tests/handlers/test_room_summary.py b/tests/handlers/test_room_summary.py index 7b95844b55..e5a6a6c747 100644 --- a/tests/handlers/test_room_summary.py +++ b/tests/handlers/test_room_summary.py @@ -32,7 +32,7 @@ from synapse.handlers.room_summary import _child_events_comparison_key, _RoomEnt from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, UserID, create_requester from tests import unittest @@ -249,7 +249,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): self._assert_rooms(result, expected) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -263,7 +263,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): expected = [(self.space, [self.room]), (self.room, ())] self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) # If the space is made invite-only, it should no longer be viewable. @@ -274,7 +276,10 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): tok=self.token, ) self.get_failure(self.handler.get_space_summary(user2, self.space), AuthError) - self.get_failure(self.handler.get_room_hierarchy(user2, self.space), AuthError) + self.get_failure( + self.handler.get_room_hierarchy(create_requester(user2), self.space), + AuthError, + ) # If the space is made world-readable it should return a result. self.helper.send_state( @@ -286,7 +291,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): result = self.get_success(self.handler.get_space_summary(user2, self.space)) self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) # Make it not world-readable again and confirm it results in an error. @@ -297,7 +304,10 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): tok=self.token, ) self.get_failure(self.handler.get_space_summary(user2, self.space), AuthError) - self.get_failure(self.handler.get_room_hierarchy(user2, self.space), AuthError) + self.get_failure( + self.handler.get_room_hierarchy(create_requester(user2), self.space), + AuthError, + ) # Join the space and results should be returned. self.helper.invite(self.space, targ=user2, tok=self.token) @@ -305,7 +315,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): result = self.get_success(self.handler.get_space_summary(user2, self.space)) self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) # Attempting to view an unknown room returns the same error. @@ -314,7 +326,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): AuthError, ) self.get_failure( - self.handler.get_room_hierarchy(user2, "#not-a-space:" + self.hs.hostname), + self.handler.get_room_hierarchy( + create_requester(user2), "#not-a-space:" + self.hs.hostname + ), AuthError, ) @@ -322,10 +336,10 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): """In-flight room hierarchy requests are deduplicated.""" # Run two `get_room_hierarchy` calls up until they block. deferred1 = ensureDeferred( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) deferred2 = ensureDeferred( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) # Complete the two calls. @@ -340,7 +354,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # A subsequent `get_room_hierarchy` call should not reuse the result. result3 = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result3, expected) self.assertIsNot(result1, result3) @@ -359,9 +373,11 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # Run two `get_room_hierarchy` calls for different users up until they block. deferred1 = ensureDeferred( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) + ) + deferred2 = ensureDeferred( + self.handler.get_room_hierarchy(create_requester(user2), self.space) ) - deferred2 = ensureDeferred(self.handler.get_room_hierarchy(user2, self.space)) # Complete the two calls. result1 = self.get_success(deferred1) @@ -465,7 +481,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): ] self._assert_rooms(result, expected) - result = self.get_success(self.handler.get_room_hierarchy(user2, self.space)) + result = self.get_success( + self.handler.get_room_hierarchy(create_requester(user2), self.space) + ) self._assert_hierarchy(result, expected) def test_complex_space(self): @@ -507,7 +525,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): self._assert_rooms(result, expected) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -522,7 +540,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): room_ids.append(self.room) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, limit=7) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, limit=7 + ) ) # The result should have the space and all of the links, plus some of the # rooms and a pagination token. @@ -534,7 +554,10 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # Check the next page. result = self.get_success( self.handler.get_room_hierarchy( - self.user, self.space, limit=5, from_token=result["next_batch"] + create_requester(self.user), + self.space, + limit=5, + from_token=result["next_batch"], ) ) # The result should have the space and the room in it, along with a link @@ -554,20 +577,22 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): room_ids.append(self.room) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, limit=7) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, limit=7 + ) ) self.assertIn("next_batch", result) # Changing the room ID, suggested-only, or max-depth causes an error. self.get_failure( self.handler.get_room_hierarchy( - self.user, self.room, from_token=result["next_batch"] + create_requester(self.user), self.room, from_token=result["next_batch"] ), SynapseError, ) self.get_failure( self.handler.get_room_hierarchy( - self.user, + create_requester(self.user), self.space, suggested_only=True, from_token=result["next_batch"], @@ -576,14 +601,19 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): ) self.get_failure( self.handler.get_room_hierarchy( - self.user, self.space, max_depth=0, from_token=result["next_batch"] + create_requester(self.user), + self.space, + max_depth=0, + from_token=result["next_batch"], ), SynapseError, ) # An invalid token is ignored. self.get_failure( - self.handler.get_room_hierarchy(self.user, self.space, from_token="foo"), + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, from_token="foo" + ), SynapseError, ) @@ -609,14 +639,18 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # Test just the space itself. result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, max_depth=0) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, max_depth=0 + ) ) expected: List[Tuple[str, Iterable[str]]] = [(spaces[0], [rooms[0], spaces[1]])] self._assert_hierarchy(result, expected) # A single additional layer. result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, max_depth=1) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, max_depth=1 + ) ) expected += [ (rooms[0], ()), @@ -626,7 +660,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): # A few layers. result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space, max_depth=3) + self.handler.get_room_hierarchy( + create_requester(self.user), self.space, max_depth=3 + ) ) expected += [ (rooms[1], ()), @@ -657,7 +693,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): self._assert_rooms(result, expected) result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -739,7 +775,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): new=summarize_remote_room_hierarchy, ): result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -906,7 +942,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): new=summarize_remote_room_hierarchy, ): result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) @@ -964,7 +1000,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase): new=summarize_remote_room_hierarchy, ): result = self.get_success( - self.handler.get_room_hierarchy(self.user, self.space) + self.handler.get_room_hierarchy(create_requester(self.user), self.space) ) self._assert_hierarchy(result, expected) -- cgit 1.5.1 From a9481223d1d5a8b3bf0d7ce2140dd3c919481f4f Mon Sep 17 00:00:00 2001 From: Marcus Date: Tue, 30 Nov 2021 12:49:20 +0100 Subject: Improved push typing (#11409) Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/11409.misc | 1 + docs/templates.md | 5 + synapse/push/emailpusher.py | 10 +- synapse/push/httppusher.py | 3 +- synapse/push/mailer.py | 72 ++++++----- synapse/push/push_types.py | 136 +++++++++++++++++++++ .../storage/databases/main/event_push_actions.py | 19 ++- 7 files changed, 210 insertions(+), 36 deletions(-) create mode 100644 changelog.d/11409.misc create mode 100644 synapse/push/push_types.py (limited to 'docs') diff --git a/changelog.d/11409.misc b/changelog.d/11409.misc new file mode 100644 index 0000000000..f9e8ae9e3a --- /dev/null +++ b/changelog.d/11409.misc @@ -0,0 +1 @@ +Improve internal types in push code. diff --git a/docs/templates.md b/docs/templates.md index a240f58b54..2b66e9d862 100644 --- a/docs/templates.md +++ b/docs/templates.md @@ -71,7 +71,12 @@ Below are the templates Synapse will look for when generating the content of an * `sender_avatar_url`: the avatar URL (as a `mxc://` URL) for the event's sender * `sender_hash`: a hash of the user ID of the sender + * `msgtype`: the type of the message + * `body_text_html`: html representation of the message + * `body_text_plain`: plaintext representation of the message + * `image_url`: mxc url of an image, when "msgtype" is "m.image" * `link`: a `matrix.to` link to the room + * `avator_url`: url to the room's avator * `reason`: information on the event that triggered the email to be sent. It's an object with the following attributes: * `room_id`: the ID of the room the event was sent in diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index cf5abdfbda..4f13c0418a 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -21,6 +21,8 @@ from twisted.internet.interfaces import IDelayedCall from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import Pusher, PusherConfig, PusherConfigException, ThrottleParams from synapse.push.mailer import Mailer +from synapse.push.push_types import EmailReason +from synapse.storage.databases.main.event_push_actions import EmailPushAction from synapse.util.threepids import validate_email if TYPE_CHECKING: @@ -190,7 +192,7 @@ class EmailPusher(Pusher): # we then consider all previously outstanding notifications # to be delivered. - reason = { + reason: EmailReason = { "room_id": push_action["room_id"], "now": self.clock.time_msec(), "received_at": received_at, @@ -275,7 +277,7 @@ class EmailPusher(Pusher): return may_send_at async def sent_notif_update_throttle( - self, room_id: str, notified_push_action: dict + self, room_id: str, notified_push_action: EmailPushAction ) -> None: # We have sent a notification, so update the throttle accordingly. # If the event that triggered the notif happened more than @@ -315,7 +317,9 @@ class EmailPusher(Pusher): self.pusher_id, room_id, self.throttle_params[room_id] ) - async def send_notification(self, push_actions: List[dict], reason: dict) -> None: + async def send_notification( + self, push_actions: List[EmailPushAction], reason: EmailReason + ) -> None: logger.info("Sending notif email for user %r", self.user_id) await self.mailer.send_notification_mail( diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index dbf4ad7f97..3fa603ccb7 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -26,6 +26,7 @@ from synapse.events import EventBase from synapse.logging import opentracing from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import Pusher, PusherConfig, PusherConfigException +from synapse.storage.databases.main.event_push_actions import HttpPushAction from . import push_rule_evaluator, push_tools @@ -273,7 +274,7 @@ class HttpPusher(Pusher): ) break - async def _process_one(self, push_action: dict) -> bool: + async def _process_one(self, push_action: HttpPushAction) -> bool: if "notify" not in push_action["actions"]: return True diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index ce299ba3da..ba4f866487 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -14,7 +14,7 @@ import logging import urllib.parse -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, TypeVar +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, TypeVar import bleach import jinja2 @@ -28,6 +28,14 @@ from synapse.push.presentable_names import ( descriptor_from_member_events, name_from_member_event, ) +from synapse.push.push_types import ( + EmailReason, + MessageVars, + NotifVars, + RoomVars, + TemplateVars, +) +from synapse.storage.databases.main.event_push_actions import EmailPushAction from synapse.storage.state import StateFilter from synapse.types import StateMap, UserID from synapse.util.async_helpers import concurrently_execute @@ -135,7 +143,7 @@ class Mailer: % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, @@ -165,7 +173,7 @@ class Mailer: % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, @@ -196,7 +204,7 @@ class Mailer: % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, @@ -210,8 +218,8 @@ class Mailer: app_id: str, user_id: str, email_address: str, - push_actions: Iterable[Dict[str, Any]], - reason: Dict[str, Any], + push_actions: Iterable[EmailPushAction], + reason: EmailReason, ) -> None: """ Send email regarding a user's room notifications @@ -230,7 +238,7 @@ class Mailer: [pa["event_id"] for pa in push_actions] ) - notifs_by_room: Dict[str, List[Dict[str, Any]]] = {} + notifs_by_room: Dict[str, List[EmailPushAction]] = {} for pa in push_actions: notifs_by_room.setdefault(pa["room_id"], []).append(pa) @@ -258,7 +266,7 @@ class Mailer: # actually sort our so-called rooms_in_order list, most recent room first rooms_in_order.sort(key=lambda r: -(notifs_by_room[r][-1]["received_ts"] or 0)) - rooms: List[Dict[str, Any]] = [] + rooms: List[RoomVars] = [] for r in rooms_in_order: roomvars = await self._get_room_vars( @@ -289,7 +297,7 @@ class Mailer: notifs_by_room, state_by_room, notif_events, reason ) - template_vars = { + template_vars: TemplateVars = { "user_display_name": user_display_name, "unsubscribe_link": self._make_unsubscribe_link( user_id, app_id, email_address @@ -302,10 +310,10 @@ class Mailer: await self.send_email(email_address, summary_text, template_vars) async def send_email( - self, email_address: str, subject: str, extra_template_vars: Dict[str, Any] + self, email_address: str, subject: str, extra_template_vars: TemplateVars ) -> None: """Send an email with the given information and template text""" - template_vars = { + template_vars: TemplateVars = { "app_name": self.app_name, "server_name": self.hs.config.server.server_name, } @@ -327,10 +335,10 @@ class Mailer: self, room_id: str, user_id: str, - notifs: Iterable[Dict[str, Any]], + notifs: Iterable[EmailPushAction], notif_events: Dict[str, EventBase], room_state_ids: StateMap[str], - ) -> Dict[str, Any]: + ) -> RoomVars: """ Generate the variables for notifications on a per-room basis. @@ -356,7 +364,7 @@ class Mailer: room_name = await calculate_room_name(self.store, room_state_ids, user_id) - room_vars: Dict[str, Any] = { + room_vars: RoomVars = { "title": room_name, "hash": string_ordinal_total(room_id), # See sender avatar hash "notifs": [], @@ -417,11 +425,11 @@ class Mailer: async def _get_notif_vars( self, - notif: Dict[str, Any], + notif: EmailPushAction, user_id: str, notif_event: EventBase, room_state_ids: StateMap[str], - ) -> Dict[str, Any]: + ) -> NotifVars: """ Generate the variables for a single notification. @@ -442,7 +450,7 @@ class Mailer: after_limit=CONTEXT_AFTER, ) - ret = { + ret: NotifVars = { "link": self._make_notif_link(notif), "ts": notif["received_ts"], "messages": [], @@ -461,8 +469,8 @@ class Mailer: return ret async def _get_message_vars( - self, notif: Dict[str, Any], event: EventBase, room_state_ids: StateMap[str] - ) -> Optional[Dict[str, Any]]: + self, notif: EmailPushAction, event: EventBase, room_state_ids: StateMap[str] + ) -> Optional[MessageVars]: """ Generate the variables for a single event, if possible. @@ -494,7 +502,9 @@ class Mailer: if sender_state_event: sender_name = name_from_member_event(sender_state_event) - sender_avatar_url = sender_state_event.content.get("avatar_url") + sender_avatar_url: Optional[str] = sender_state_event.content.get( + "avatar_url" + ) else: # No state could be found, fallback to the MXID. sender_name = event.sender @@ -504,7 +514,7 @@ class Mailer: # sender_hash % the number of default images to choose from sender_hash = string_ordinal_total(event.sender) - ret = { + ret: MessageVars = { "event_type": event.type, "is_historical": event.event_id != notif["event_id"], "id": event.event_id, @@ -519,6 +529,8 @@ class Mailer: return ret msgtype = event.content.get("msgtype") + if not isinstance(msgtype, str): + msgtype = None ret["msgtype"] = msgtype @@ -533,7 +545,7 @@ class Mailer: return ret def _add_text_message_vars( - self, messagevars: Dict[str, Any], event: EventBase + self, messagevars: MessageVars, event: EventBase ) -> None: """ Potentially add a sanitised message body to the message variables. @@ -543,8 +555,8 @@ class Mailer: event: The event under consideration. """ msgformat = event.content.get("format") - - messagevars["format"] = msgformat + if not isinstance(msgformat, str): + msgformat = None formatted_body = event.content.get("formatted_body") body = event.content.get("body") @@ -555,7 +567,7 @@ class Mailer: messagevars["body_text_html"] = safe_text(body) def _add_image_message_vars( - self, messagevars: Dict[str, Any], event: EventBase + self, messagevars: MessageVars, event: EventBase ) -> None: """ Potentially add an image URL to the message variables. @@ -570,7 +582,7 @@ class Mailer: async def _make_summary_text_single_room( self, room_id: str, - notifs: List[Dict[str, Any]], + notifs: List[EmailPushAction], room_state_ids: StateMap[str], notif_events: Dict[str, EventBase], user_id: str, @@ -685,10 +697,10 @@ class Mailer: async def _make_summary_text( self, - notifs_by_room: Dict[str, List[Dict[str, Any]]], + notifs_by_room: Dict[str, List[EmailPushAction]], room_state_ids: Dict[str, StateMap[str]], notif_events: Dict[str, EventBase], - reason: Dict[str, Any], + reason: EmailReason, ) -> str: """ Make a summary text for the email when multiple rooms have notifications. @@ -718,7 +730,7 @@ class Mailer: async def _make_summary_text_from_member_events( self, room_id: str, - notifs: List[Dict[str, Any]], + notifs: List[EmailPushAction], room_state_ids: StateMap[str], notif_events: Dict[str, EventBase], ) -> str: @@ -805,7 +817,7 @@ class Mailer: base_url = "https://matrix.to/#" return "%s/%s" % (base_url, room_id) - def _make_notif_link(self, notif: Dict[str, str]) -> str: + def _make_notif_link(self, notif: EmailPushAction) -> str: """ Generate a link to open an event in the web client. diff --git a/synapse/push/push_types.py b/synapse/push/push_types.py new file mode 100644 index 0000000000..8d16ab62ce --- /dev/null +++ b/synapse/push/push_types.py @@ -0,0 +1,136 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional + +from typing_extensions import TypedDict + + +class EmailReason(TypedDict, total=False): + """ + Information on the event that triggered the email to be sent + + room_id: the ID of the room the event was sent in + now: timestamp in ms when the email is being sent out + room_name: a human-readable name for the room the event was sent in + received_at: the time in milliseconds at which the event was received + delay_before_mail_ms: the amount of time in milliseconds Synapse always waits + before ever emailing about a notification (to give the user a chance to respond + to other push or notice the window) + last_sent_ts: the time in milliseconds at which a notification was last sent + for an event in this room + throttle_ms: the minimum amount of time in milliseconds between two + notifications can be sent for this room + """ + + room_id: str + now: int + room_name: Optional[str] + received_at: int + delay_before_mail_ms: int + last_sent_ts: int + throttle_ms: int + + +class MessageVars(TypedDict, total=False): + """ + Details about a specific message to include in a notification + + event_type: the type of the event + is_historical: a boolean, which is `False` if the message is the one + that triggered the notification, `True` otherwise + id: the ID of the event + ts: the time in milliseconds at which the event was sent + sender_name: the display name for the event's sender + sender_avatar_url: the avatar URL (as a `mxc://` URL) for the event's + sender + sender_hash: a hash of the user ID of the sender + msgtype: the type of the message + body_text_html: html representation of the message + body_text_plain: plaintext representation of the message + image_url: mxc url of an image, when "msgtype" is "m.image" + """ + + event_type: str + is_historical: bool + id: str + ts: int + sender_name: str + sender_avatar_url: Optional[str] + sender_hash: int + msgtype: Optional[str] + body_text_html: str + body_text_plain: str + image_url: str + + +class NotifVars(TypedDict): + """ + Details about an event we are about to include in a notification + + link: a `matrix.to` link to the event + ts: the time in milliseconds at which the event was received + messages: a list of messages containing one message before the event, the + message in the event, and one message after the event. + """ + + link: str + ts: Optional[int] + messages: List[MessageVars] + + +class RoomVars(TypedDict): + """ + Represents a room containing events to include in the email. + + title: a human-readable name for the room + hash: a hash of the ID of the room + invite: a boolean, which is `True` if the room is an invite the user hasn't + accepted yet, `False` otherwise + notifs: a list of events, or an empty list if `invite` is `True`. + link: a `matrix.to` link to the room + avator_url: url to the room's avator + """ + + title: Optional[str] + hash: int + invite: bool + notifs: List[NotifVars] + link: str + avatar_url: Optional[str] + + +class TemplateVars(TypedDict, total=False): + """ + Generic structure for passing to the email sender, can hold all the fields used in email templates. + + app_name: name of the app/service this homeserver is associated with + server_name: name of our own homeserver + link: a link to include into the email to be sent + user_display_name: the display name for the user receiving the notification + unsubscribe_link: the link users can click to unsubscribe from email notifications + summary_text: a summary of the notification(s). The text used can be customised + by configuring the various settings in the `email.subjects` section of the + configuration file. + rooms: a list of rooms containing events to include in the email + reason: information on the event that triggered the email to be sent + """ + + app_name: str + server_name: str + link: str + user_display_name: str + unsubscribe_link: str + summary_text: str + rooms: List[RoomVars] + reason: EmailReason diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index d957e770dc..3efdd0c920 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -16,6 +16,7 @@ import logging from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union import attr +from typing_extensions import TypedDict from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json @@ -37,6 +38,20 @@ DEFAULT_HIGHLIGHT_ACTION = [ ] +class BasePushAction(TypedDict): + event_id: str + actions: List[Union[dict, str]] + + +class HttpPushAction(BasePushAction): + room_id: str + stream_ordering: int + + +class EmailPushAction(HttpPushAction): + received_ts: Optional[int] + + def _serialize_action(actions, is_highlight): """Custom serializer for actions. This allows us to "compress" common actions. @@ -221,7 +236,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): min_stream_ordering: int, max_stream_ordering: int, limit: int = 20, - ) -> List[dict]: + ) -> List[HttpPushAction]: """Get a list of the most recent unread push actions for a given user, within the given stream ordering range. Called by the httppusher. @@ -326,7 +341,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): min_stream_ordering: int, max_stream_ordering: int, limit: int = 20, - ) -> List[dict]: + ) -> List[EmailPushAction]: """Get a list of the most recent unread push actions for a given user, within the given stream ordering range. Called by the emailpusher -- cgit 1.5.1 From f1795463bf503a6fca909d77f598f641f9349f56 Mon Sep 17 00:00:00 2001 From: Shay Date: Tue, 30 Nov 2021 19:05:20 -0800 Subject: Add a note about huge pages to our Postgres doc (#11467) * Add note to postgres doc about hugepages * Newsfragment --- changelog.d/11467.misc | 1 + docs/postgres.md | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 changelog.d/11467.misc (limited to 'docs') diff --git a/changelog.d/11467.misc b/changelog.d/11467.misc new file mode 100644 index 0000000000..84e8a5ab4e --- /dev/null +++ b/changelog.d/11467.misc @@ -0,0 +1 @@ +Add a note about postgres memory management and hugepages to postgres doc. \ No newline at end of file diff --git a/docs/postgres.md b/docs/postgres.md index 083b0aaff0..e4861c1f12 100644 --- a/docs/postgres.md +++ b/docs/postgres.md @@ -118,6 +118,9 @@ performance: Note that the appropriate values for those fields depend on the amount of free memory the database host has available. +Additionally, admins of large deployments might want to consider using huge pages +to help manage memory, especially when using large values of `shared_buffers`. You +can read more about that [here](https://www.postgresql.org/docs/10/kernel-resources.html#LINUX-HUGE-PAGES). ## Porting from SQLite -- cgit 1.5.1 From b0eb64ff7bf6bde42046e091f8bdea9b7aab5f04 Mon Sep 17 00:00:00 2001 From: Etienne Dysli Metref Date: Wed, 1 Dec 2021 10:40:51 +0100 Subject: Remove mention of OIDC certification from Dex (#11470) Dex isn't yet [1,2] a certified OpenID Provider implementation. As of today, it's not on the list maintained by the OpenID Foundation. [3] [1] https://github.com/dexidp/dex/issues/42 [2] https://github.com/dexidp/dex/issues/262 [3] https://openid.net/certification/ --- docs/openid.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/openid.md b/docs/openid.md index c74e8bda60..f366709c69 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -83,7 +83,7 @@ oidc_providers: ### Dex -[Dex][dex-idp] is a simple, open-source, certified OpenID Connect Provider. +[Dex][dex-idp] is a simple, open-source OpenID Connect Provider. Although it is designed to help building a full-blown provider with an external database, it can be configured with static passwords in a config file. -- cgit 1.5.1 From b9fef1a7cdfcc128fa589a32160e6aa7ed8964d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 1 Dec 2021 10:57:55 +0000 Subject: Update openid.md fix header level --- docs/openid.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/openid.md b/docs/openid.md index f366709c69..ff9de9d5b8 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -523,7 +523,7 @@ The synapse config will look like this: email_template: "{{ user.email }}" ``` -## Django OAuth Toolkit +### Django OAuth Toolkit [django-oauth-toolkit](https://github.com/jazzband/django-oauth-toolkit) is a Django application providing out of the box all the endpoints, data and logic -- cgit 1.5.1 From 153194c7717d8016b0eb974c81b1baee7dc1917d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 1 Dec 2021 15:13:01 +0100 Subject: Link background update controller docs to summary (#11475) --- changelog.d/11475.feature | 1 + docs/SUMMARY.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/11475.feature (limited to 'docs') diff --git a/changelog.d/11475.feature b/changelog.d/11475.feature new file mode 100644 index 0000000000..aba3292015 --- /dev/null +++ b/changelog.d/11475.feature @@ -0,0 +1 @@ +Add plugin support for controlling database background updates. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index cdedf8bccc..900254ff64 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -44,6 +44,7 @@ - [Presence router callbacks](modules/presence_router_callbacks.md) - [Account validity callbacks](modules/account_validity_callbacks.md) - [Password auth provider callbacks](modules/password_auth_provider_callbacks.md) + - [Background update controller callbacks](modules/background_update_controller_callbacks.md) - [Porting a legacy module to the new interface](modules/porting_legacy_module.md) - [Workers](workers.md) - [Using `synctl` with Workers](synctl_workers.md) -- cgit 1.5.1 From 84dc50e160a2ec6590813374b5a1e58b97f7a18d Mon Sep 17 00:00:00 2001 From: Shay Date: Wed, 1 Dec 2021 10:12:19 -0800 Subject: Port wiki pages to documentation website (#11402) * move wiki pages to synapse/docs and add a few titles where necessary * update SUMMARY.md with added pages * add changelog * move incorrectly located newsfragment * update changelog number * snake case added files and update summary.md accordingly * update issue/pr links * update relative links to docs * update changelog to indicate that we moved wiki pages to the docs and state reasoning * revert unintentional change to CHANGES.md * add link Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> * Update CHANGES.md Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- CHANGES.md | 17 +++ changelog.d/11402.misc | 1 + docs/SUMMARY.md | 6 + .../running_synapse_on_single_board_computers.md | 74 ++++++++++ docs/usage/administration/admin_faq.md | 96 +++++++++++++ .../administration/database_maintenance_tools.md | 36 +++++ docs/usage/administration/state_groups.md | 25 ++++ ...understanding_synapse_through_grafana_graphs.md | 72 ++++++++++ docs/usage/administration/useful_sql_for_admins.md | 156 +++++++++++++++++++++ 9 files changed, 483 insertions(+) create mode 100644 changelog.d/11402.misc create mode 100644 docs/other/running_synapse_on_single_board_computers.md create mode 100644 docs/usage/administration/admin_faq.md create mode 100644 docs/usage/administration/database_maintenance_tools.md create mode 100644 docs/usage/administration/state_groups.md create mode 100644 docs/usage/administration/understanding_synapse_through_grafana_graphs.md create mode 100644 docs/usage/administration/useful_sql_for_admins.md (limited to 'docs') diff --git a/CHANGES.md b/CHANGES.md index c283e33876..2e3f0da961 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,19 @@ +UNRELEASED +========== + +We've decided to move the existing, somewhat stagnant pages from the GitHub wiki +to the [documentation website](https://matrix-org.github.io/synapse/latest/). +This was done for two reasons. The first was to ensure that changes are checked by +multiple authors before being committed (everyone makes mistakes!) and the second +was visibility of the documentation. Not everyone knows that Synapse has some very +useful information hidden away in its GitHub wiki pages. Bringing them to the +documentation website should help with visibility, as well as keep all Synapse documentation +in one, easily-searchable location. + +Note that contributions to the documentation website happen through [GitHub pull +requests](https://github.com/matrix-org/synapse/pulls). Please visit [#synapse-dev:matrix.org](https://matrix.to/#/#synapse-dev:matrix.org) +if you need help with the process! + Synapse 1.48.0 (2021-11-30) =========================== @@ -86,6 +102,7 @@ Internal Changes - Refactor including the bundled relations when serializing an event. ([\#11408](https://github.com/matrix-org/synapse/issues/11408)) + Synapse 1.47.1 (2021-11-23) =========================== diff --git a/changelog.d/11402.misc b/changelog.d/11402.misc new file mode 100644 index 0000000000..c956338744 --- /dev/null +++ b/changelog.d/11402.misc @@ -0,0 +1 @@ +Add wiki pages to documentation website. \ No newline at end of file diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 900254ff64..41c8f0fbc9 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -67,7 +67,12 @@ - [Server Version](admin_api/version_api.md) - [Manhole](manhole.md) - [Monitoring](metrics-howto.md) + - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md) + - [Useful SQL for Admins](usage/administration/useful_sql_for_admins.md) + - [Database Maintenance Tools](usage/administration/database_maintenance_tools.md) + - [State Groups](usage/administration/state_groups.md) - [Request log format](usage/administration/request_log.md) + - [Admin FAQ](usage/administration/admin_faq.md) - [Scripts]() # Development @@ -95,3 +100,4 @@ # Other - [Dependency Deprecation Policy](deprecation_policy.md) + - [Running Synapse on a Single-Board Computer](other/running_synapse_on_single_board_computers.md) diff --git a/docs/other/running_synapse_on_single_board_computers.md b/docs/other/running_synapse_on_single_board_computers.md new file mode 100644 index 0000000000..ea14afa8b2 --- /dev/null +++ b/docs/other/running_synapse_on_single_board_computers.md @@ -0,0 +1,74 @@ +## Summary of performance impact of running on resource constrained devices such as SBCs + +I've been running my homeserver on a cubietruck at home now for some time and am often replying to statements like "you need loads of ram to join large rooms" with "it works fine for me". I thought it might be useful to curate a summary of the issues you're likely to run into to help as a scaling-down guide, maybe highlight these for development work or end up as documentation. It seems that once you get up to about 4x1.5GHz arm64 4GiB these issues are no longer a problem. + +- **Platform**: 2x1GHz armhf 2GiB ram [Single-board computers](https://wiki.debian.org/CheapServerBoxHardware), SSD, postgres. + +### Presence + +This is the main reason people have a poor matrix experience on resource constrained homeservers. Element web will frequently be saying the server is offline while the python process will be pegged at 100% cpu. This feature is used to tell when other users are active (have a client app in the foreground) and therefore more likely to respond, but requires a lot of network activity to maintain even when nobody is talking in a room. + +![Screenshot_2020-10-01_19-29-46](https://user-images.githubusercontent.com/71895/94848963-a47a3580-041c-11eb-8b6e-acb772b4259e.png) + +While synapse does have some performance issues with presence [#3971](https://github.com/matrix-org/synapse/issues/3971), the fundamental problem is that this is an easy feature to implement for a centralised service at nearly no overhead, but federation makes it combinatorial [#8055](https://github.com/matrix-org/synapse/issues/8055). There is also a client-side config option which disables the UI and idle tracking [enable_presence_by_hs_url] to blacklist the largest instances but I didn't notice much difference, so I recommend disabling the feature entirely at the server level as well. + +[enable_presence_by_hs_url]: https://github.com/vector-im/element-web/blob/v1.7.8/config.sample.json#L45 + +### Joining + +Joining a "large", federated room will initially fail with the below message in Element web, but waiting a while (10-60mins) and trying again will succeed without any issue. What counts as "large" is not message history, user count, connections to homeservers or even a simple count of the state events, it is instead how long the state resolution algorithm takes. However, each of those numbers are reasonable proxies, so we can use them as estimates since user count is one of the few things you see before joining. + +![Screenshot_2020-10-02_17-15-06](https://user-images.githubusercontent.com/71895/94945781-18771500-04d3-11eb-8419-83c2da73a341.png) + +This is [#1211](https://github.com/matrix-org/synapse/issues/1211) and will also hopefully be mitigated by peeking [matrix-org/matrix-doc#2753](https://github.com/matrix-org/matrix-doc/pull/2753) so at least you don't need to wait for a join to complete before finding out if it's the kind of room you want. Note that you should first disable presence, otherwise it'll just make the situation worse [#3120](https://github.com/matrix-org/synapse/issues/3120). There is a lot of database interaction too, so make sure you've [migrated your data](../postgres.md) from the default sqlite to postgresql. Personally, I recommend patience - once the initial join is complete there's rarely any issues with actually interacting with the room, but if you like you can just block "large" rooms entirely. + +### Sessions + +Anything that requires modifying the device list [#7721](https://github.com/matrix-org/synapse/issues/7721) will take a while to propagate, again taking the client "Offline" until it's complete. This includes signing in and out, editing the public name and verifying e2ee. The main mitigation I recommend is to keep long-running sessions open e.g. by using Firefox SSB "Use this site in App mode" or Chromium PWA "Install Element". + +### Recommended configuration + +Put the below in a new file at /etc/matrix-synapse/conf.d/sbc.yaml to override the defaults in homeserver.yaml. + +``` +# Set to false to disable presence tracking on this homeserver. +use_presence: false + +# When this is enabled, the room "complexity" will be checked before a user +# joins a new remote room. If it is above the complexity limit, the server will +# disallow joining, or will instantly leave. +limit_remote_rooms: + # Uncomment to enable room complexity checking. + #enabled: true + complexity: 3.0 + +# Database configuration +database: + name: psycopg2 + args: + user: matrix-synapse + # Generate a long, secure one with a password manager + password: hunter2 + database: matrix-synapse + host: localhost + cp_min: 5 + cp_max: 10 +``` + +Currently the complexity is measured by [current_state_events / 500](https://github.com/matrix-org/synapse/blob/v1.20.1/synapse/storage/databases/main/events_worker.py#L986). You can find join times and your most complex rooms like this: + +``` +admin@homeserver:~$ zgrep '/client/r0/join/' /var/log/matrix-synapse/homeserver.log* | awk '{print $18, $25}' | sort --human-numeric-sort +29.922sec/-0.002sec /_matrix/client/r0/join/%23debian-fasttrack%3Apoddery.com +182.088sec/0.003sec /_matrix/client/r0/join/%23decentralizedweb-general%3Amatrix.org +911.625sec/-570.847sec /_matrix/client/r0/join/%23synapse%3Amatrix.org + +admin@homeserver:~$ sudo --user postgres psql matrix-synapse --command 'select canonical_alias, joined_members, current_state_events from room_stats_state natural join room_stats_current where canonical_alias is not null order by current_state_events desc fetch first 5 rows only' + canonical_alias | joined_members | current_state_events +-------------------------------+----------------+---------------------- + #_oftc_#debian:matrix.org | 871 | 52355 + #matrix:matrix.org | 6379 | 10684 + #irc:matrix.org | 461 | 3751 + #decentralizedweb-general:matrix.org | 997 | 1509 + #whatsapp:maunium.net | 554 | 854 +``` \ No newline at end of file diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md new file mode 100644 index 0000000000..162af0c741 --- /dev/null +++ b/docs/usage/administration/admin_faq.md @@ -0,0 +1,96 @@ +## Admin FAQ + +How do I become a server admin? +--- +If your server already has an admin account you should use the user admin API to promote other accounts to become admins. See [User Admin API](../../admin_api/user_admin_api.md#Change-whether-a-user-is-a-server-administrator-or-not) + +If you don't have any admin accounts yet you won't be able to use the admin API so you'll have to edit the database manually. Manually editing the database is generally not recommended so once you have an admin account, use the admin APIs to make further changes. + + UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'; + +What servers are my server talking to? +--- +Run this sql query on your db: + + SELECT * FROM destinations; + +What servers are currently participating in this room? +--- +Run this sql query on your db: + + SELECT DISTINCT split_part(state_key, ':', 2) + FROM current_state_events AS c + INNER JOIN room_memberships AS m USING (room_id, event_id) + WHERE room_id = '!cURbafjkfsMDVwdRDQ:matrix.org' AND membership = 'join'; + +What users are registered on my server? +--- + + SELECT NAME from users; + +Manually resetting passwords: +--- +See https://github.com/matrix-org/synapse/blob/master/README.rst#password-reset + +I have a problem with my server. Can I just delete my database and start again? +--- +Deleting your database is unlikely to make anything better. + +It's easy to make the mistake of thinking that you can start again from a clean slate by dropping your database, but things don't work like that in a federated network: lots of other servers have information about your server. + +For example: other servers might think that you are in a room, your server will think that you are not, and you'll probably be unable to interact with that room in a sensible way ever again. + +In general, there are better solutions to any problem than dropping the database. Come and seek help in https://matrix.to/#/#synapse:matrix.org. + +There are two exceptions when it might be sensible to delete your database and start again: +* You have *never* joined any rooms which are federated with other servers. For instance, a local deployment which the outside world can't talk to. +* You are changing the `server_name` in the homeserver configuration. In effect this makes your server a completely new one from the point of view of the network, so in this case it makes sense to start with a clean database. +(In both cases you probably also want to clear out the media_store.) + +I've stuffed up access to my room, how can I delete it to free up the alias? +--- +Using the following curl command: + + curl -H 'Authorization: Bearer ' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/ + +\ - can be obtained in riot by looking in the riot settings, down the bottom is: +Access Token:\ + +\ - the room alias, eg. #my_room:matrix.org this possibly needs to be URL encoded also, for example %23my_room%3Amatrix.org + +How can I find the lines corresponding to a given HTTP request in my homeserver log? +--- + +Synapse tags each log line according to the HTTP request it is processing. When it finishes processing each request, it logs a line containing the words `Processed request: `. For example: + +``` +2019-02-14 22:35:08,196 - synapse.access.http.8008 - 302 - INFO - GET-37 - ::1 - 8008 - {@richvdh:localhost} Processed request: 0.173sec/0.001sec (0.002sec, 0.000sec) (0.027sec/0.026sec/2) 687B 200 "GET /_matrix/client/r0/sync HTTP/1.1" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" [0 dbevts]" +``` + +Here we can see that the request has been tagged with `GET-37`. (The tag depends on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`, `OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do: + +``` +grep 'GET-37' homeserver.log +``` + +If you want to paste that output into a github issue or matrix room, please remember to surround it with triple-backticks (```) to make it legible (see https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code). + + +What do all those fields in the 'Processed' line mean? +--- +See [Request log format](request_log.md). + + +What are the biggest rooms on my server? +--- + +```sql +SELECT s.canonical_alias, g.room_id, count(*) AS num_rows +FROM + state_groups_state AS g, + room_stats_state AS s +WHERE g.room_id = s.room_id +GROUP BY s.canonical_alias, g.room_id +ORDER BY num_rows desc +LIMIT 10; +``` \ No newline at end of file diff --git a/docs/usage/administration/database_maintenance_tools.md b/docs/usage/administration/database_maintenance_tools.md new file mode 100644 index 0000000000..2c29ccfe69 --- /dev/null +++ b/docs/usage/administration/database_maintenance_tools.md @@ -0,0 +1,36 @@ +This blog post by Victor Berger explains how to use many of the tools listed on this page: https://levans.fr/shrink-synapse-database.html + +# List of useful tools and scripts for maintenance Synapse database: + +## [Purge Remote Media API](../../admin_api/media_admin_api.md#purge-remote-media-api) +The purge remote media API allows server admins to purge old cached remote media. + +#### Purge local media + +There is no purge API for local media because you may be the only one with a copy. If you are sure you want to delete local media you could use something like the following to delete media that hasn't been accessed in a while. + +``` +find /path/to/synapse/media_store/local_content -atime +365 -delete +find /path/to/synapse/media_store/local_thumbnails -atime +365 -delete +``` + +This will delete media that hasn't been accessed in 365 days. + +**Warning, check noatime flag**: You also have to double-check that the filesystem where synapse's media store don't have [noatime flag](https://unix.stackexchange.com/questions/219015/how-to-disable-access-time-settings-in-debian-linux/219017#219017). Check it with `mount`, `noatime` is something that is usually enabled by default to reduce read-write operations in the filesystem for a feature is not so demanded. + +## [Purge History API](../../admin_api/purge_history_api.md) +The purge history API allows server admins to purge historic events from their database, reclaiming disk space. + +## [synapse_janitor.sql](https://github.com/xwiki-labs/synapse_scripts) + +- this project is [unmantained and dangerous](https://github.com/xwiki-labs/synapse_scripts#unmaintained-and-dangerous) +- USE WITH CAUTION. It may cause database corruption (see https://github.com/matrix-org/synapse/issues/7305 for example). +- ["Our team hasn't used any of these scripts for some time (possibly years) (...) if anyone wants to volunteer to maintain this repo I'd be happy to transfer ownership.](https://github.com/xwiki-labs/synapse_scripts/pull/12#issuecomment-617275345) + +Cleans a synapse Postgres database of deleted messages and abandoned rooms. + +## [synapse-compress-state](https://github.com/matrix-org/rust-synapse-compress-state) +Tool for compressing (deduplicating) `state_groups_state` table. + +## [SQL for analyzing Synapse PostgreSQL database stats](https://github.com/matrix-org/synapse/wiki/SQL-for-analyzing-Synapse-PostgreSQL-database-stats) +Some easy SQL that reports useful stat about Matrix Synapse database. \ No newline at end of file diff --git a/docs/usage/administration/state_groups.md b/docs/usage/administration/state_groups.md new file mode 100644 index 0000000000..f1dee7accf --- /dev/null +++ b/docs/usage/administration/state_groups.md @@ -0,0 +1,25 @@ +# How do State Groups work? + +As a general rule, I encourage people who want to understand the deepest darkest secrets of the database schema to drop by #synapse-dev:matrix.org and ask questions. + +However, one question that comes up frequently is that of how "state groups" work, and why the `state_groups_state` table gets so big, so here's an attempt to answer that question. + +We need to be able to relatively quickly calculate the state of a room at any point in that room's history. In other words, we need to know the state of the room at each event in that room. This is done as follows: + +A sequence of events where the state is the same are grouped together into a `state_group`; the mapping is recorded in `event_to_state_groups`. (Technically speaking, since a state event usually changes the state in the room, we are recording the state of the room *after* the given event id: which is to say, to a handwavey simplification, the first event in a state group is normally a state event, and others in the same state group are normally non-state-events.) + +`state_groups` records, for each state group, the id of the room that we're looking at, and also the id of the first event in that group. (I'm not sure if that event id is used much in practice.) + +Now, if we stored all the room state for each `state_group`, that would be a huge amount of data. Instead, for each state group, we normally store the difference between the state in that group and some other state group, and only occasionally (every 100 state changes or so) record the full state. + +So, most state groups have an entry in `state_group_edges` (don't ask me why it's not a column in `state_groups`) which records the previous state group in the room, and `state_groups_state` records the differences in state since that previous state group. + +A full state group just records the event id for each piece of state in the room at that point. + +## Known bugs with state groups + +There are various reasons that we can end up creating many more state groups than we need: see https://github.com/matrix-org/synapse/issues/3364 for more details. + +## Compression tool + +There is a tool at https://github.com/matrix-org/rust-synapse-compress-state which can compress the `state_groups_state` on a room by-room basis (essentially, it reduces the number of "full" state groups). This can result in dramatic reductions of the storage used. \ No newline at end of file diff --git a/docs/usage/administration/understanding_synapse_through_grafana_graphs.md b/docs/usage/administration/understanding_synapse_through_grafana_graphs.md new file mode 100644 index 0000000000..bfe4360b4e --- /dev/null +++ b/docs/usage/administration/understanding_synapse_through_grafana_graphs.md @@ -0,0 +1,72 @@ +## Understanding Synapse through Grafana graphs + +It is possible monitor much of the internal state of Synapse using [Prometheus](https://prometheus.io) metrics and [Grafana](https://grafana.com/). A guide for configuring Synapse to provide metrics is available [here](../../metrics-howto.md) and information on setting up Grafana is [here](https://github.com/matrix-org/synapse/tree/master/contrib/grafana). In this setup, Prometheus will periodically scrape the information Synapse provides and store a record of it over time. Grafana is then used as an interface to query and present this information through a series of pretty graphs. + +Once you have grafana set up, and assuming you're using [our grafana dashboard template](https://github.com/matrix-org/synapse/blob/master/contrib/grafana/synapse.json), look for the following graphs when debugging a slow/overloaded Synapse: + +## Message Event Send Time + +![image](https://user-images.githubusercontent.com/1342360/82239409-a1c8e900-9930-11ea-8081-e4614e0c63f4.png) + +This, along with the CPU and Memory graphs, is a good way to check the general health of your Synapse instance. It represents how long it takes for a user on your homeserver to send a message. + +## Transaction Count and Transaction Duration + +![image](https://user-images.githubusercontent.com/1342360/82239985-8d392080-9931-11ea-80d0-843ab2f22e1e.png) + +![image](https://user-images.githubusercontent.com/1342360/82240050-ab068580-9931-11ea-98f1-f94671cbac9a.png) + +These graphs show the database transactions that are occurring the most frequently, as well as those are that are taking the most amount of time to execute. + +![image](https://user-images.githubusercontent.com/1342360/82240192-e86b1300-9931-11ea-9aac-3e2c9bfa6fdc.png) + +In the first graph, we can see obvious spikes corresponding to lots of `get_user_by_id` transactions. This would be useful information to figure out which part of the Synapse codebase is potentially creating a heavy load on the system. However, be sure to cross-reference this with Transaction Duration, which states that `get_users_by_id` is actually a very quick database transaction and isn't causing as much load as others, like `persist_events`: + +![image](https://user-images.githubusercontent.com/1342360/82240467-62030100-9932-11ea-8db9-917f2d977fe1.png) + +Still, it's probably worth investigating why we're getting users from the database that often, and whether it's possible to reduce the amount of queries we make by adjusting our cache factor(s). + +The `persist_events` transaction is responsible for saving new room events to the Synapse database, so can often show a high transaction duration. + +## Federation + +The charts in the "Federation" section show information about incoming and outgoing federation requests. Federation data can be divided into two basic types: + +- PDU (Persistent Data Unit) - room events: messages, state events (join/leave), etc. These are permanently stored in the database. +- EDU (Ephemeral Data Unit) - other data, which need not be stored permanently, such as read receipts, typing notifications. + +The "Outgoing EDUs by type" chart shows the EDUs within outgoing federation requests by type: `m.device_list_update`, `m.direct_to_device`, `m.presence`, `m.receipt`, `m.typing`. + +If you see a large number of `m.presence` EDUs and are having trouble with too much CPU load, you can disable `presence` in the Synapse config. See also [#3971](https://github.com/matrix-org/synapse/issues/3971). + +## Caches + +![image](https://user-images.githubusercontent.com/1342360/82240572-8b239180-9932-11ea-96ff-6b5f0e57ebe5.png) + +![image](https://user-images.githubusercontent.com/1342360/82240666-b8703f80-9932-11ea-86af-9f663988d8da.png) + +This is quite a useful graph. It shows how many times Synapse attempts to retrieve a piece of data from a cache which the cache did not contain, thus resulting in a call to the database. We can see here that the `_get_joined_profile_from_event_id` cache is being requested a lot, and often the data we're after is not cached. + +Cross-referencing this with the Eviction Rate graph, which shows that entries are being evicted from `_get_joined_profile_from_event_id` quite often: + +![image](https://user-images.githubusercontent.com/1342360/82240766-de95df80-9932-11ea-8c15-5acfc57c48da.png) + +we should probably consider raising the size of that cache by raising its cache factor (a multiplier value for the size of an individual cache). Information on doing so is available [here](https://github.com/matrix-org/synapse/blob/ee421e524478c1ad8d43741c27379499c2f6135c/docs/sample_config.yaml#L608-L642) (note that the configuration of individual cache factors through the configuration file is available in Synapse v1.14.0+, whereas doing so through environment variables has been supported for a very long time). Note that this will increase Synapse's overall memory usage. + +## Forward Extremities + +![image](https://user-images.githubusercontent.com/1342360/82241440-13566680-9934-11ea-8b88-ba468db937ed.png) + +Forward extremities are the leaf events at the end of a DAG in a room, aka events that have no children. The more exist in a room, the more [state resolution](https://matrix.org/docs/spec/server_server/r0.1.3#room-state-resolution) that Synapse needs to perform (hint: it's an expensive operation). While Synapse has code to prevent too many of these existing at one time in a room, bugs can sometimes make them crop up again. + +If a room has >10 forward extremities, it's worth checking which room is the culprit and potentially removing them using the SQL queries mentioned in [#1760](https://github.com/matrix-org/synapse/issues/1760). + +## Garbage Collection + +![image](https://user-images.githubusercontent.com/1342360/82241911-da6ac180-9934-11ea-9a0d-a311fe22acd0.png) + +Large spikes in garbage collection times (bigger than shown here, I'm talking in the multiple seconds range), can cause lots of problems in Synapse performance. It's more an indicator of problems, and a symptom of other problems though, so check other graphs for what might be causing it. + +## Final Thoughts + +If you're still having performance problems with your Synapse instance and you've tried everything you can, it may just be a lack of system resources. Consider adding more CPU and RAM, and make use of [worker mode](../../workers.md) to make use of multiple CPU cores / multiple machines for your homeserver. \ No newline at end of file diff --git a/docs/usage/administration/useful_sql_for_admins.md b/docs/usage/administration/useful_sql_for_admins.md new file mode 100644 index 0000000000..d4aada3272 --- /dev/null +++ b/docs/usage/administration/useful_sql_for_admins.md @@ -0,0 +1,156 @@ +## Some useful SQL queries for Synapse Admins + +## Size of full matrix db +`SELECT pg_size_pretty( pg_database_size( 'matrix' ) );` +### Result example: +``` +pg_size_pretty +---------------- + 6420 MB +(1 row) +``` +## Show top 20 larger rooms by state events count +```sql +SELECT r.name, s.room_id, s.current_state_events + FROM room_stats_current s + LEFT JOIN room_stats_state r USING (room_id) + ORDER BY current_state_events DESC + LIMIT 20; +``` + +and by state_group_events count: +```sql +SELECT rss.name, s.room_id, count(s.room_id) FROM state_groups_state s +LEFT JOIN room_stats_state rss USING (room_id) +GROUP BY s.room_id, rss.name +ORDER BY count(s.room_id) DESC +LIMIT 20; +``` +plus same, but with join removed for performance reasons: +```sql +SELECT s.room_id, count(s.room_id) FROM state_groups_state s +GROUP BY s.room_id +ORDER BY count(s.room_id) DESC +LIMIT 20; +``` + +## Show top 20 larger tables by row count +```sql +SELECT relname, n_live_tup as rows + FROM pg_stat_user_tables + ORDER BY n_live_tup DESC + LIMIT 20; +``` +This query is quick, but may be very approximate, for exact number of rows use `SELECT COUNT(*) FROM `. +### Result example: +``` +state_groups_state - 161687170 +event_auth - 8584785 +event_edges - 6995633 +event_json - 6585916 +event_reference_hashes - 6580990 +events - 6578879 +received_transactions - 5713989 +event_to_state_groups - 4873377 +stream_ordering_to_exterm - 4136285 +current_state_delta_stream - 3770972 +event_search - 3670521 +state_events - 2845082 +room_memberships - 2785854 +cache_invalidation_stream - 2448218 +state_groups - 1255467 +state_group_edges - 1229849 +current_state_events - 1222905 +users_in_public_rooms - 364059 +device_lists_stream - 326903 +user_directory_search - 316433 +``` + +## Show top 20 rooms by new events count in last 1 day: +```sql +SELECT e.room_id, r.name, COUNT(e.event_id) cnt FROM events e +LEFT JOIN room_stats_state r USING (room_id) +WHERE e.origin_server_ts >= DATE_PART('epoch', NOW() - INTERVAL '1 day') * 1000 GROUP BY e.room_id, r.name ORDER BY cnt DESC LIMIT 20; +``` + +## Show top 20 users on homeserver by sent events (messages) at last month: +```sql +SELECT user_id, SUM(total_events) + FROM user_stats_historical + WHERE TO_TIMESTAMP(end_ts/1000) AT TIME ZONE 'UTC' > date_trunc('day', now() - interval '1 month') + GROUP BY user_id + ORDER BY SUM(total_events) DESC + LIMIT 20; +``` + +## Show last 100 messages from needed user, with room names: +```sql +SELECT e.room_id, r.name, e.event_id, e.type, e.content, j.json FROM events e + LEFT JOIN event_json j USING (room_id) + LEFT JOIN room_stats_state r USING (room_id) + WHERE sender = '@LOGIN:example.com' + AND e.type = 'm.room.message' + ORDER BY stream_ordering DESC + LIMIT 100; +``` + +## Show top 20 larger tables by storage size +```sql +SELECT nspname || '.' || relname AS "relation", + pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size" + FROM pg_class C + LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) + WHERE nspname NOT IN ('pg_catalog', 'information_schema') + AND C.relkind <> 'i' + AND nspname !~ '^pg_toast' + ORDER BY pg_total_relation_size(C.oid) DESC + LIMIT 20; +``` +### Result example: +``` +public.state_groups_state - 27 GB +public.event_json - 9855 MB +public.events - 3675 MB +public.event_edges - 3404 MB +public.received_transactions - 2745 MB +public.event_reference_hashes - 1864 MB +public.event_auth - 1775 MB +public.stream_ordering_to_exterm - 1663 MB +public.event_search - 1370 MB +public.room_memberships - 1050 MB +public.event_to_state_groups - 948 MB +public.current_state_delta_stream - 711 MB +public.state_events - 611 MB +public.presence_stream - 530 MB +public.current_state_events - 525 MB +public.cache_invalidation_stream - 466 MB +public.receipts_linearized - 279 MB +public.state_groups - 160 MB +public.device_lists_remote_cache - 124 MB +public.state_group_edges - 122 MB +``` + +## Show rooms with names, sorted by events in this rooms +`echo "select event_json.room_id,room_stats_state.name from event_json,room_stats_state where room_stats_state.room_id=event_json.room_id" | psql synapse | sort | uniq -c | sort -n` +### Result example: +``` + 9459 !FPUfgzXYWTKgIrwKxW:matrix.org | This Week in Matrix + 9459 !FPUfgzXYWTKgIrwKxW:matrix.org | This Week in Matrix (TWIM) + 17799 !iDIOImbmXxwNngznsa:matrix.org | Linux in Russian + 18739 !GnEEPYXUhoaHbkFBNX:matrix.org | Riot Android + 23373 !QtykxKocfZaZOUrTwp:matrix.org | Matrix HQ + 39504 !gTQfWzbYncrtNrvEkB:matrix.org | ru.[matrix] + 43601 !iNmaIQExDMeqdITdHH:matrix.org | Riot + 43601 !iNmaIQExDMeqdITdHH:matrix.org | Riot Web/Desktop +``` + +## Lookup room state info by list of room_id +```sql +SELECT rss.room_id, rss.name, rss.canonical_alias, rss.topic, rss.encryption, rsc.joined_members, rsc.local_users_in_room, rss.join_rules +FROM room_stats_state rss +LEFT JOIN room_stats_current rsc USING (room_id) +WHERE room_id IN (WHERE room_id IN ( + '!OGEhHVWSdvArJzumhm:matrix.org', + '!YTvKGNlinIzlkMTVRl:matrix.org' +) +``` \ No newline at end of file -- cgit 1.5.1 From 49e1356ee3d5d72929c91f778b3a231726c1413c Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 2 Dec 2021 09:46:20 -0800 Subject: Minor cleanup on recently ported doc pages (#11466) * move wiki pages to synapse/docs and add a few titles where necessary * update SUMMARY.md with added pages * add changelog * move incorrectly located newsfragment * update changelog number * snake case added files and update summary.md accordingly * update issue/pr links * update relative links to docs * update changelog to indicate that we moved wiki pages to the docs and state reasoning * requested changes to admin_faq.md * requested changes to database_maintenance_tools.md * requested changes to understanding_synapse_through_graphana_graphs.md * add changelog * fix leftover merge errata * fix unwanted changes from merge * use two spaces between entries * outdent code blocks --- CHANGES.md | 6 ++-- changelog.d/11466.misc | 1 + docs/usage/administration/admin_faq.md | 41 +++++++++++++--------- .../administration/database_maintenance_tools.md | 26 +++----------- ...understanding_synapse_through_grafana_graphs.md | 20 ++++++++--- 5 files changed, 48 insertions(+), 46 deletions(-) create mode 100644 changelog.d/11466.misc (limited to 'docs') diff --git a/CHANGES.md b/CHANGES.md index 2e3f0da961..f398b8e9c5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,9 @@ UNRELEASED ========== -We've decided to move the existing, somewhat stagnant pages from the GitHub wiki +We've decided to move the existing, somewhat stagnant pages from the GitHub wiki to the [documentation website](https://matrix-org.github.io/synapse/latest/). + This was done for two reasons. The first was to ensure that changes are checked by multiple authors before being committed (everyone makes mistakes!) and the second was visibility of the documentation. Not everyone knows that Synapse has some very @@ -10,7 +11,7 @@ useful information hidden away in its GitHub wiki pages. Bringing them to the documentation website should help with visibility, as well as keep all Synapse documentation in one, easily-searchable location. -Note that contributions to the documentation website happen through [GitHub pull +Note that contributions to the documentation website happen through [GitHub pull requests](https://github.com/matrix-org/synapse/pulls). Please visit [#synapse-dev:matrix.org](https://matrix.to/#/#synapse-dev:matrix.org) if you need help with the process! @@ -102,7 +103,6 @@ Internal Changes - Refactor including the bundled relations when serializing an event. ([\#11408](https://github.com/matrix-org/synapse/issues/11408)) - Synapse 1.47.1 (2021-11-23) =========================== diff --git a/changelog.d/11466.misc b/changelog.d/11466.misc new file mode 100644 index 0000000000..4317d017d7 --- /dev/null +++ b/changelog.d/11466.misc @@ -0,0 +1 @@ +Update and clean up recently ported documentation pages. \ No newline at end of file diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 162af0c741..3dcad4bbef 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -6,27 +6,31 @@ If your server already has an admin account you should use the user admin API to If you don't have any admin accounts yet you won't be able to use the admin API so you'll have to edit the database manually. Manually editing the database is generally not recommended so once you have an admin account, use the admin APIs to make further changes. - UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'; - +```sql +UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'; +``` What servers are my server talking to? --- Run this sql query on your db: - - SELECT * FROM destinations; +```sql +SELECT * FROM destinations; +``` What servers are currently participating in this room? --- Run this sql query on your db: - - SELECT DISTINCT split_part(state_key, ':', 2) - FROM current_state_events AS c - INNER JOIN room_memberships AS m USING (room_id, event_id) - WHERE room_id = '!cURbafjkfsMDVwdRDQ:matrix.org' AND membership = 'join'; +```sql +SELECT DISTINCT split_part(state_key, ':', 2) + FROM current_state_events AS c + INNER JOIN room_memberships AS m USING (room_id, event_id) + WHERE room_id = '!cURbafjkfsMDVwdRDQ:matrix.org' AND membership = 'join'; +``` What users are registered on my server? --- - - SELECT NAME from users; +```sql +SELECT NAME from users; +``` Manually resetting passwords: --- @@ -50,13 +54,13 @@ There are two exceptions when it might be sensible to delete your database and s I've stuffed up access to my room, how can I delete it to free up the alias? --- Using the following curl command: - - curl -H 'Authorization: Bearer ' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/ - -\ - can be obtained in riot by looking in the riot settings, down the bottom is: +``` +curl -H 'Authorization: Bearer ' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/ +``` +`` - can be obtained in riot by looking in the riot settings, down the bottom is: Access Token:\ -\ - the room alias, eg. #my_room:matrix.org this possibly needs to be URL encoded also, for example %23my_room%3Amatrix.org +`` - the room alias, eg. #my_room:matrix.org this possibly needs to be URL encoded also, for example %23my_room%3Amatrix.org How can I find the lines corresponding to a given HTTP request in my homeserver log? --- @@ -93,4 +97,7 @@ WHERE g.room_id = s.room_id GROUP BY s.canonical_alias, g.room_id ORDER BY num_rows desc LIMIT 10; -``` \ No newline at end of file +``` + +You can also use the [List Room API](../../admin_api/rooms.md#list-room-api) +and `order_by` `state_events`. diff --git a/docs/usage/administration/database_maintenance_tools.md b/docs/usage/administration/database_maintenance_tools.md index 2c29ccfe69..92b805d413 100644 --- a/docs/usage/administration/database_maintenance_tools.md +++ b/docs/usage/administration/database_maintenance_tools.md @@ -5,32 +5,14 @@ This blog post by Victor Berger explains how to use many of the tools listed on ## [Purge Remote Media API](../../admin_api/media_admin_api.md#purge-remote-media-api) The purge remote media API allows server admins to purge old cached remote media. -#### Purge local media - -There is no purge API for local media because you may be the only one with a copy. If you are sure you want to delete local media you could use something like the following to delete media that hasn't been accessed in a while. - -``` -find /path/to/synapse/media_store/local_content -atime +365 -delete -find /path/to/synapse/media_store/local_thumbnails -atime +365 -delete -``` - -This will delete media that hasn't been accessed in 365 days. - -**Warning, check noatime flag**: You also have to double-check that the filesystem where synapse's media store don't have [noatime flag](https://unix.stackexchange.com/questions/219015/how-to-disable-access-time-settings-in-debian-linux/219017#219017). Check it with `mount`, `noatime` is something that is usually enabled by default to reduce read-write operations in the filesystem for a feature is not so demanded. +## [Purge Local Media API](../../admin_api/media_admin_api.md#delete-local-media) +This API deletes the *local* media from the disk of your own server. ## [Purge History API](../../admin_api/purge_history_api.md) The purge history API allows server admins to purge historic events from their database, reclaiming disk space. -## [synapse_janitor.sql](https://github.com/xwiki-labs/synapse_scripts) - -- this project is [unmantained and dangerous](https://github.com/xwiki-labs/synapse_scripts#unmaintained-and-dangerous) -- USE WITH CAUTION. It may cause database corruption (see https://github.com/matrix-org/synapse/issues/7305 for example). -- ["Our team hasn't used any of these scripts for some time (possibly years) (...) if anyone wants to volunteer to maintain this repo I'd be happy to transfer ownership.](https://github.com/xwiki-labs/synapse_scripts/pull/12#issuecomment-617275345) - -Cleans a synapse Postgres database of deleted messages and abandoned rooms. - ## [synapse-compress-state](https://github.com/matrix-org/rust-synapse-compress-state) Tool for compressing (deduplicating) `state_groups_state` table. -## [SQL for analyzing Synapse PostgreSQL database stats](https://github.com/matrix-org/synapse/wiki/SQL-for-analyzing-Synapse-PostgreSQL-database-stats) -Some easy SQL that reports useful stat about Matrix Synapse database. \ No newline at end of file +## [SQL for analyzing Synapse PostgreSQL database stats](useful_sql_for_admins.md) +Some easy SQL that reports useful stats about your Synapse database. \ No newline at end of file diff --git a/docs/usage/administration/understanding_synapse_through_grafana_graphs.md b/docs/usage/administration/understanding_synapse_through_grafana_graphs.md index bfe4360b4e..c365cc3923 100644 --- a/docs/usage/administration/understanding_synapse_through_grafana_graphs.md +++ b/docs/usage/administration/understanding_synapse_through_grafana_graphs.md @@ -1,6 +1,12 @@ ## Understanding Synapse through Grafana graphs -It is possible monitor much of the internal state of Synapse using [Prometheus](https://prometheus.io) metrics and [Grafana](https://grafana.com/). A guide for configuring Synapse to provide metrics is available [here](../../metrics-howto.md) and information on setting up Grafana is [here](https://github.com/matrix-org/synapse/tree/master/contrib/grafana). In this setup, Prometheus will periodically scrape the information Synapse provides and store a record of it over time. Grafana is then used as an interface to query and present this information through a series of pretty graphs. +It is possible to monitor much of the internal state of Synapse using [Prometheus](https://prometheus.io) +metrics and [Grafana](https://grafana.com/). +A guide for configuring Synapse to provide metrics is available [here](../../metrics-howto.md) +and information on setting up Grafana is [here](https://github.com/matrix-org/synapse/tree/master/contrib/grafana). +In this setup, Prometheus will periodically scrape the information Synapse provides and +store a record of it over time. Grafana is then used as an interface to query and +present this information through a series of pretty graphs. Once you have grafana set up, and assuming you're using [our grafana dashboard template](https://github.com/matrix-org/synapse/blob/master/contrib/grafana/synapse.json), look for the following graphs when debugging a slow/overloaded Synapse: @@ -57,7 +63,7 @@ we should probably consider raising the size of that cache by raising its cache ![image](https://user-images.githubusercontent.com/1342360/82241440-13566680-9934-11ea-8b88-ba468db937ed.png) -Forward extremities are the leaf events at the end of a DAG in a room, aka events that have no children. The more exist in a room, the more [state resolution](https://matrix.org/docs/spec/server_server/r0.1.3#room-state-resolution) that Synapse needs to perform (hint: it's an expensive operation). While Synapse has code to prevent too many of these existing at one time in a room, bugs can sometimes make them crop up again. +Forward extremities are the leaf events at the end of a DAG in a room, aka events that have no children. The more that exist in a room, the more [state resolution](https://spec.matrix.org/v1.1/server-server-api/#room-state-resolution) that Synapse needs to perform (hint: it's an expensive operation). While Synapse has code to prevent too many of these existing at one time in a room, bugs can sometimes make them crop up again. If a room has >10 forward extremities, it's worth checking which room is the culprit and potentially removing them using the SQL queries mentioned in [#1760](https://github.com/matrix-org/synapse/issues/1760). @@ -65,8 +71,14 @@ If a room has >10 forward extremities, it's worth checking which room is the cul ![image](https://user-images.githubusercontent.com/1342360/82241911-da6ac180-9934-11ea-9a0d-a311fe22acd0.png) -Large spikes in garbage collection times (bigger than shown here, I'm talking in the multiple seconds range), can cause lots of problems in Synapse performance. It's more an indicator of problems, and a symptom of other problems though, so check other graphs for what might be causing it. +Large spikes in garbage collection times (bigger than shown here, I'm talking in the +multiple seconds range), can cause lots of problems in Synapse performance. It's more an +indicator of problems, and a symptom of other problems though, so check other graphs for what might be causing it. ## Final Thoughts -If you're still having performance problems with your Synapse instance and you've tried everything you can, it may just be a lack of system resources. Consider adding more CPU and RAM, and make use of [worker mode](../../workers.md) to make use of multiple CPU cores / multiple machines for your homeserver. \ No newline at end of file +If you're still having performance problems with your Synapse instance and you've +tried everything you can, it may just be a lack of system resources. Consider adding +more CPU and RAM, and make use of [worker mode](../../workers.md) +to make use of multiple CPU cores / multiple machines for your homeserver. + -- cgit 1.5.1 From 4eb77965cd016181d2111f37d93526e9bb0434f0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 3 Dec 2021 18:25:04 -0600 Subject: Update backward extremity docs to make it clear that it does not indicate whether we have fetched an events' `prev_events` (#11469) Spawning from https://github.com/matrix-org/synapse/pull/9445#discussion_r758958181 Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/11469.doc | 1 + docs/development/room-dag-concepts.md | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) create mode 100644 changelog.d/11469.doc (limited to 'docs') diff --git a/changelog.d/11469.doc b/changelog.d/11469.doc new file mode 100644 index 0000000000..51602aa968 --- /dev/null +++ b/changelog.d/11469.doc @@ -0,0 +1 @@ +Update section about backward extremities in the room DAG concepts doc to correct the misconception about backward extremities indicating whether we have fetched an events' `prev_events`. diff --git a/docs/development/room-dag-concepts.md b/docs/development/room-dag-concepts.md index 5eed72bec6..cbc7cf2949 100644 --- a/docs/development/room-dag-concepts.md +++ b/docs/development/room-dag-concepts.md @@ -38,16 +38,15 @@ Most-recent-in-time events in the DAG which are not referenced by any other even The forward extremities of a room are used as the `prev_events` when the next event is sent. -## Backwards extremity +## Backward extremity The current marker of where we have backfilled up to and will generally be the -oldest-in-time events we know of in the DAG. +`prev_events` of the oldest-in-time events we have in the DAG. This gives a starting point when +backfilling history. -This is an event where we haven't fetched all of the `prev_events` for. - -Once we have fetched all of its `prev_events`, it's unmarked as a backwards -extremity (although we may have formed new backwards extremities from the prev -events during the backfilling process). +When we persist a non-outlier event, we clear it as a backward extremity and set +all of its `prev_events` as the new backward extremities if they aren't already +persisted in the `events` table. ## Outliers @@ -56,8 +55,7 @@ We mark an event as an `outlier` when we haven't figured out the state for the room at that point in the DAG yet. We won't *necessarily* have the `prev_events` of an `outlier` in the database, -but it's entirely possible that we *might*. The status of whether we have all of -the `prev_events` is marked as a [backwards extremity](#backwards-extremity). +but it's entirely possible that we *might*. For example, when we fetch the event auth chain or state for a given event, we mark all of those claimed auth events as outliers because we haven't done the -- cgit 1.5.1 From 8b4b153c9e86c04c7db8c74fde4b6a04becbc461 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 6 Dec 2021 17:59:50 +0100 Subject: Add admin API to get some information about federation status (#11407) --- changelog.d/11407.feature | 1 + docs/SUMMARY.md | 1 + docs/usage/administration/admin_api/federation.md | 114 ++++++ synapse/rest/admin/__init__.py | 6 + synapse/rest/admin/federation.py | 135 +++++++ synapse/storage/databases/main/transactions.py | 70 ++++ tests/rest/admin/test_federation.py | 456 ++++++++++++++++++++++ 7 files changed, 783 insertions(+) create mode 100644 changelog.d/11407.feature create mode 100644 docs/usage/administration/admin_api/federation.md create mode 100644 synapse/rest/admin/federation.py create mode 100644 tests/rest/admin/test_federation.py (limited to 'docs') diff --git a/changelog.d/11407.feature b/changelog.d/11407.feature new file mode 100644 index 0000000000..1d21bde98f --- /dev/null +++ b/changelog.d/11407.feature @@ -0,0 +1 @@ +Add admin API to get some information about federation status with remote servers. \ No newline at end of file diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 41c8f0fbc9..b05af6d690 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -65,6 +65,7 @@ - [Statistics](admin_api/statistics.md) - [Users](admin_api/user_admin_api.md) - [Server Version](admin_api/version_api.md) + - [Federation](usage/administration/admin_api/federation.md) - [Manhole](manhole.md) - [Monitoring](metrics-howto.md) - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md) diff --git a/docs/usage/administration/admin_api/federation.md b/docs/usage/administration/admin_api/federation.md new file mode 100644 index 0000000000..8f9535f57b --- /dev/null +++ b/docs/usage/administration/admin_api/federation.md @@ -0,0 +1,114 @@ +# Federation API + +This API allows a server administrator to manage Synapse's federation with other homeservers. + +Note: This API is new, experimental and "subject to change". + +## List of destinations + +This API gets the current destination retry timing info for all remote servers. + +The list contains all the servers with which the server federates, +regardless of whether an error occurred or not. +If an error occurs, it may take up to 20 minutes for the error to be displayed here, +as a complete retry must have failed. + +The API is: + +A standard request with no filtering: + +``` +GET /_synapse/admin/v1/federation/destinations +``` + +A response body like the following is returned: + +```json +{ + "destinations":[ + { + "destination": "matrix.org", + "retry_last_ts": 1557332397936, + "retry_interval": 3000000, + "failure_ts": 1557329397936, + "last_successful_stream_ordering": null + } + ], + "total": 1 +} +``` + +To paginate, check for `next_token` and if present, call the endpoint again +with `from` set to the value of `next_token`. This will return a new page. + +If the endpoint does not return a `next_token` then there are no more destinations +to paginate through. + +**Parameters** + +The following query parameters are available: + +- `from` - Offset in the returned list. Defaults to `0`. +- `limit` - Maximum amount of destinations to return. Defaults to `100`. +- `order_by` - The method in which to sort the returned list of destinations. + Valid values are: + - `destination` - Destinations are ordered alphabetically by remote server name. + This is the default. + - `retry_last_ts` - Destinations are ordered by time of last retry attempt in ms. + - `retry_interval` - Destinations are ordered by how long until next retry in ms. + - `failure_ts` - Destinations are ordered by when the server started failing in ms. + - `last_successful_stream_ordering` - Destinations are ordered by the stream ordering + of the most recent successfully-sent PDU. +- `dir` - Direction of room order. Either `f` for forwards or `b` for backwards. Setting + this value to `b` will reverse the above sort order. Defaults to `f`. + +*Caution:* The database only has an index on the column `destination`. +This means that if a different sort order is used, +this can cause a large load on the database, especially for large environments. + +**Response** + +The following fields are returned in the JSON response body: + +- `destinations` - An array of objects, each containing information about a destination. + Destination objects contain the following fields: + - `destination` - string - Name of the remote server to federate. + - `retry_last_ts` - integer - The last time Synapse tried and failed to reach the + remote server, in ms. This is `0` if the last attempt to communicate with the + remote server was successful. + - `retry_interval` - integer - How long since the last time Synapse tried to reach + the remote server before trying again, in ms. This is `0` if no further retrying occuring. + - `failure_ts` - nullable integer - The first time Synapse tried and failed to reach the + remote server, in ms. This is `null` if communication with the remote server has never failed. + - `last_successful_stream_ordering` - nullable integer - The stream ordering of the most + recent successfully-sent [PDU](understanding_synapse_through_grafana_graphs.md#federation) + to this destination, or `null` if this information has not been tracked yet. +- `next_token`: string representing a positive integer - Indication for pagination. See above. +- `total` - integer - Total number of destinations. + +# Destination Details API + +This API gets the retry timing info for a specific remote server. + +The API is: + +``` +GET /_synapse/admin/v1/federation/destinations/ +``` + +A response body like the following is returned: + +```json +{ + "destination": "matrix.org", + "retry_last_ts": 1557332397936, + "retry_interval": 3000000, + "failure_ts": 1557329397936, + "last_successful_stream_ordering": null +} +``` + +**Response** + +The response fields are the same like in the `destinations` array in +[List of destinations](#list-of-destinations) response. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index c51a029bf3..c499afd4be 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -40,6 +40,10 @@ from synapse.rest.admin.event_reports import ( EventReportDetailRestServlet, EventReportsRestServlet, ) +from synapse.rest.admin.federation import ( + DestinationsRestServlet, + ListDestinationsRestServlet, +) from synapse.rest.admin.groups import DeleteGroupAdminRestServlet from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo from synapse.rest.admin.registration_tokens import ( @@ -261,6 +265,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ListRegistrationTokensRestServlet(hs).register(http_server) NewRegistrationTokenRestServlet(hs).register(http_server) RegistrationTokenRestServlet(hs).register(http_server) + DestinationsRestServlet(hs).register(http_server) + ListDestinationsRestServlet(hs).register(http_server) # Some servlets only get registered for the main process. if hs.config.worker.worker_app is None: diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py new file mode 100644 index 0000000000..744687be35 --- /dev/null +++ b/synapse/rest/admin/federation.py @@ -0,0 +1,135 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from http import HTTPStatus +from typing import TYPE_CHECKING, Tuple + +from synapse.api.errors import Codes, NotFoundError, SynapseError +from synapse.http.servlet import RestServlet, parse_integer, parse_string +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.storage.databases.main.transactions import DestinationSortOrder +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class ListDestinationsRestServlet(RestServlet): + """Get request to list all destinations. + This needs user to have administrator access in Synapse. + + GET /_synapse/admin/v1/federation/destinations?from=0&limit=10 + + returns: + 200 OK with list of destinations if success otherwise an error. + + The parameters `from` and `limit` are required only for pagination. + By default, a `limit` of 100 is used. + The parameter `destination` can be used to filter by destination. + The parameter `order_by` can be used to order the result. + """ + + PATTERNS = admin_patterns("/federation/destinations$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + + if start < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + destination = parse_string(request, "destination") + + order_by = parse_string( + request, + "order_by", + default=DestinationSortOrder.DESTINATION.value, + allowed_values=[dest.value for dest in DestinationSortOrder], + ) + + direction = parse_string(request, "dir", default="f", allowed_values=("f", "b")) + + destinations, total = await self._store.get_destinations_paginate( + start, limit, destination, order_by, direction + ) + response = {"destinations": destinations, "total": total} + if (start + limit) < total: + response["next_token"] = str(start + len(destinations)) + + return HTTPStatus.OK, response + + +class DestinationsRestServlet(RestServlet): + """Get details of a destination. + This needs user to have administrator access in Synapse. + + GET /_synapse/admin/v1/federation/destinations/ + + returns: + 200 OK with details of a destination if success otherwise an error. + """ + + PATTERNS = admin_patterns("/federation/destinations/(?P[^/]+)$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET( + self, request: SynapseRequest, destination: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + destination_retry_timings = await self._store.get_destination_retry_timings( + destination + ) + + if not destination_retry_timings: + raise NotFoundError("Unknown destination") + + last_successful_stream_ordering = ( + await self._store.get_destination_last_successful_stream_ordering( + destination + ) + ) + + response = { + "destination": destination, + "failure_ts": destination_retry_timings.failure_ts, + "retry_last_ts": destination_retry_timings.retry_last_ts, + "retry_interval": destination_retry_timings.retry_interval, + "last_successful_stream_ordering": last_successful_stream_ordering, + } + + return HTTPStatus.OK, response diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index d7dc1f73ac..1622822552 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -14,6 +14,7 @@ import logging from collections import namedtuple +from enum import Enum from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple import attr @@ -44,6 +45,16 @@ _UpdateTransactionRow = namedtuple( ) +class DestinationSortOrder(Enum): + """Enum to define the sorting method used when returning destinations.""" + + DESTINATION = "destination" + RETRY_LAST_TS = "retry_last_ts" + RETTRY_INTERVAL = "retry_interval" + FAILURE_TS = "failure_ts" + LAST_SUCCESSFUL_STREAM_ORDERING = "last_successful_stream_ordering" + + @attr.s(slots=True, frozen=True, auto_attribs=True) class DestinationRetryTimings: """The current destination retry timing info for a remote server.""" @@ -480,3 +491,62 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): destinations = [row[0] for row in txn] return destinations + + async def get_destinations_paginate( + self, + start: int, + limit: int, + destination: Optional[str] = None, + order_by: str = DestinationSortOrder.DESTINATION.value, + direction: str = "f", + ) -> Tuple[List[JsonDict], int]: + """Function to retrieve a paginated list of destinations. + This will return a json list of destinations and the + total number of destinations matching the filter criteria. + + Args: + start: start number to begin the query from + limit: number of rows to retrieve + destination: search string in destination + order_by: the sort order of the returned list + direction: sort ascending or descending + Returns: + A tuple of a list of mappings from destination to information + and a count of total destinations. + """ + + def get_destinations_paginate_txn( + txn: LoggingTransaction, + ) -> Tuple[List[JsonDict], int]: + order_by_column = DestinationSortOrder(order_by).value + + if direction == "b": + order = "DESC" + else: + order = "ASC" + + args = [] + where_statement = "" + if destination: + args.extend(["%" + destination.lower() + "%"]) + where_statement = "WHERE LOWER(destination) LIKE ?" + + sql_base = f"FROM destinations {where_statement} " + sql = f"SELECT COUNT(*) as total_destinations {sql_base}" + txn.execute(sql, args) + count = txn.fetchone()[0] + + sql = f""" + SELECT destination, retry_last_ts, retry_interval, failure_ts, + last_successful_stream_ordering + {sql_base} + ORDER BY {order_by_column} {order}, destination ASC + LIMIT ? OFFSET ? + """ + txn.execute(sql, args + [limit, start]) + destinations = self.db_pool.cursor_to_dict(txn) + return destinations, count + + return await self.db_pool.runInteraction( + "get_destinations_paginate_txn", get_destinations_paginate_txn + ) diff --git a/tests/rest/admin/test_federation.py b/tests/rest/admin/test_federation.py new file mode 100644 index 0000000000..5188499ef2 --- /dev/null +++ b/tests/rest/admin/test_federation.py @@ -0,0 +1,456 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from http import HTTPStatus +from typing import List, Optional + +from parameterized import parameterized + +import synapse.rest.admin +from synapse.api.errors import Codes +from synapse.rest.client import login +from synapse.server import HomeServer +from synapse.types import JsonDict + +from tests import unittest + + +class FederationTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs: HomeServer): + self.store = hs.get_datastore() + self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.url = "/_synapse/admin/v1/federation/destinations" + + @parameterized.expand( + [ + ("/_synapse/admin/v1/federation/destinations",), + ("/_synapse/admin/v1/federation/destinations/dummy",), + ] + ) + def test_requester_is_no_admin(self, url: str): + """ + If the user is not a server admin, an error 403 is returned. + """ + + self.register_user("user", "pass", admin=False) + other_user_tok = self.login("user", "pass") + + channel = self.make_request( + "GET", + url, + content={}, + access_token=other_user_tok, + ) + + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + + # negative limit + channel = self.make_request( + "GET", + self.url + "?limit=-5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # negative from + channel = self.make_request( + "GET", + self.url + "?from=-5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # unkown order_by + channel = self.make_request( + "GET", + self.url + "?order_by=bar", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + + # invalid search order + channel = self.make_request( + "GET", + self.url + "?dir=bar", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + + # invalid destination + channel = self.make_request( + "GET", + self.url + "/dummy", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_limit(self): + """ + Testing list of destinations with limit + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url + "?limit=5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 5) + self.assertEqual(channel.json_body["next_token"], "5") + self._check_fields(channel.json_body["destinations"]) + + def test_from(self): + """ + Testing list of destinations with a defined starting point (from) + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url + "?from=5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 15) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["destinations"]) + + def test_limit_and_from(self): + """ + Testing list of destinations with a defined starting point and limit + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url + "?from=5&limit=10", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(channel.json_body["next_token"], "15") + self.assertEqual(len(channel.json_body["destinations"]), 10) + self._check_fields(channel.json_body["destinations"]) + + def test_next_token(self): + """ + Testing that `next_token` appears at the right place + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + # `next_token` does not appear + # Number of results is the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=20", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), number_destinations) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does not appear + # Number of max results is larger than the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=21", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), number_destinations) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does appear + # Number of max results is smaller than the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=19", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 19) + self.assertEqual(channel.json_body["next_token"], "19") + + # Check + # Set `from` to value of `next_token` for request remaining entries + # `next_token` does not appear + channel = self.make_request( + "GET", + self.url + "?from=19", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 1) + self.assertNotIn("next_token", channel.json_body) + + def test_list_all_destinations(self): + """ + List all destinations. + """ + number_destinations = 5 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url, + {}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(number_destinations, len(channel.json_body["destinations"])) + self.assertEqual(number_destinations, channel.json_body["total"]) + + # Check that all fields are available + self._check_fields(channel.json_body["destinations"]) + + def test_order_by(self): + """ + Testing order list with parameter `order_by` + """ + + def _order_test( + expected_destination_list: List[str], + order_by: Optional[str], + dir: Optional[str] = None, + ): + """Request the list of destinations in a certain order. + Assert that order is what we expect + + Args: + expected_destination_list: The list of user_id in the order + we expect to get back from the server + order_by: The type of ordering to give the server + dir: The direction of ordering to give the server + """ + + url = f"{self.url}?" + if order_by is not None: + url += f"order_by={order_by}&" + if dir is not None and dir in ("b", "f"): + url += f"dir={dir}" + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], len(expected_destination_list)) + + returned_order = [ + row["destination"] for row in channel.json_body["destinations"] + ] + self.assertEqual(expected_destination_list, returned_order) + self._check_fields(channel.json_body["destinations"]) + + # create destinations + dest = [ + ("sub-a.example.com", 100, 300, 200, 300), + ("sub-b.example.com", 200, 200, 100, 100), + ("sub-c.example.com", 300, 100, 300, 200), + ] + for ( + destination, + failure_ts, + retry_last_ts, + retry_interval, + last_successful_stream_ordering, + ) in dest: + self.get_success( + self.store.set_destination_retry_timings( + destination, failure_ts, retry_last_ts, retry_interval + ) + ) + self.get_success( + self.store.set_destination_last_successful_stream_ordering( + destination, last_successful_stream_ordering + ) + ) + + # order by default (destination) + _order_test([dest[0][0], dest[1][0], dest[2][0]], None) + _order_test([dest[0][0], dest[1][0], dest[2][0]], None, "f") + _order_test([dest[2][0], dest[1][0], dest[0][0]], None, "b") + + # order by destination + _order_test([dest[0][0], dest[1][0], dest[2][0]], "destination") + _order_test([dest[0][0], dest[1][0], dest[2][0]], "destination", "f") + _order_test([dest[2][0], dest[1][0], dest[0][0]], "destination", "b") + + # order by failure_ts + _order_test([dest[0][0], dest[1][0], dest[2][0]], "failure_ts") + _order_test([dest[0][0], dest[1][0], dest[2][0]], "failure_ts", "f") + _order_test([dest[2][0], dest[1][0], dest[0][0]], "failure_ts", "b") + + # order by retry_last_ts + _order_test([dest[2][0], dest[1][0], dest[0][0]], "retry_last_ts") + _order_test([dest[2][0], dest[1][0], dest[0][0]], "retry_last_ts", "f") + _order_test([dest[0][0], dest[1][0], dest[2][0]], "retry_last_ts", "b") + + # order by retry_interval + _order_test([dest[1][0], dest[0][0], dest[2][0]], "retry_interval") + _order_test([dest[1][0], dest[0][0], dest[2][0]], "retry_interval", "f") + _order_test([dest[2][0], dest[0][0], dest[1][0]], "retry_interval", "b") + + # order by last_successful_stream_ordering + _order_test( + [dest[1][0], dest[2][0], dest[0][0]], "last_successful_stream_ordering" + ) + _order_test( + [dest[1][0], dest[2][0], dest[0][0]], "last_successful_stream_ordering", "f" + ) + _order_test( + [dest[0][0], dest[2][0], dest[1][0]], "last_successful_stream_ordering", "b" + ) + + def test_search_term(self): + """Test that searching for a destination works correctly""" + + def _search_test( + expected_destination: Optional[str], + search_term: str, + ): + """Search for a destination and check that the returned destinationis a match + + Args: + expected_destination: The room_id expected to be returned by the API. + Set to None to expect zero results for the search + search_term: The term to search for room names with + """ + url = f"{self.url}?destination={search_term}" + channel = self.make_request( + "GET", + url.encode("ascii"), + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + + # Check that destinations were returned + self.assertTrue("destinations" in channel.json_body) + self._check_fields(channel.json_body["destinations"]) + destinations = channel.json_body["destinations"] + + # Check that the expected number of destinations were returned + expected_destination_count = 1 if expected_destination else 0 + self.assertEqual(len(destinations), expected_destination_count) + self.assertEqual(channel.json_body["total"], expected_destination_count) + + if expected_destination: + # Check that the first returned destination is correct + self.assertEqual(expected_destination, destinations[0]["destination"]) + + number_destinations = 3 + self._create_destinations(number_destinations) + + # Test searching + _search_test("sub0.example.com", "0") + _search_test("sub0.example.com", "sub0") + + _search_test("sub1.example.com", "1") + _search_test("sub1.example.com", "1.") + + # Test case insensitive + _search_test("sub0.example.com", "SUB0") + + _search_test(None, "foo") + _search_test(None, "bar") + + def test_get_single_destination(self): + """ + Get one specific destinations. + """ + self._create_destinations(5) + + channel = self.make_request( + "GET", + self.url + "/sub0.example.com", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual("sub0.example.com", channel.json_body["destination"]) + + # Check that all fields are available + # convert channel.json_body into a List + self._check_fields([channel.json_body]) + + def _create_destinations(self, number_destinations: int): + """Create a number of destinations + + Args: + number_destinations: Number of destinations to be created + """ + for i in range(0, number_destinations): + dest = f"sub{i}.example.com" + self.get_success(self.store.set_destination_retry_timings(dest, 50, 50, 50)) + self.get_success( + self.store.set_destination_last_successful_stream_ordering(dest, 100) + ) + + def _check_fields(self, content: List[JsonDict]): + """Checks that the expected destination attributes are present in content + + Args: + content: List that is checked for content + """ + for c in content: + self.assertIn("destination", c) + self.assertIn("retry_last_ts", c) + self.assertIn("retry_interval", c) + self.assertIn("failure_ts", c) + self.assertIn("last_successful_stream_ordering", c) -- cgit 1.5.1 From 2f053f3f82ca174cc1c858c75afffae51af8ce0d Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 6 Dec 2021 19:11:43 +0000 Subject: Stabilise support for MSC2918 refresh tokens as they have now been merged into the Matrix specification. (#11435) --- changelog.d/11435.feature | 1 + docs/sample_config.yaml | 38 ++++++++++++++++++++++++++++++++++++++ synapse/config/registration.py | 38 ++++++++++++++++++++++++++++++++++++++ synapse/rest/client/login.py | 29 +++++++++++++---------------- synapse/rest/client/register.py | 23 ++++++++++------------- tests/rest/client/test_auth.py | 30 +++++++++++++++--------------- 6 files changed, 115 insertions(+), 44 deletions(-) create mode 100644 changelog.d/11435.feature (limited to 'docs') diff --git a/changelog.d/11435.feature b/changelog.d/11435.feature new file mode 100644 index 0000000000..9e127fae3c --- /dev/null +++ b/changelog.d/11435.feature @@ -0,0 +1 @@ +Stabilise support for [MSC2918](https://github.com/matrix-org/matrix-doc/blob/main/proposals/2918-refreshtokens.md#msc2918-refresh-tokens) refresh tokens as they have now been merged into the Matrix specification. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index ae476d19ac..6696ed5d1e 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1209,6 +1209,44 @@ oembed: # #session_lifetime: 24h +# Time that an access token remains valid for, if the session is +# using refresh tokens. +# For more information about refresh tokens, please see the manual. +# Note that this only applies to clients which advertise support for +# refresh tokens. +# +# Note also that this is calculated at login time and refresh time: +# changes are not applied to existing sessions until they are refreshed. +# +# By default, this is 5 minutes. +# +#refreshable_access_token_lifetime: 5m + +# Time that a refresh token remains valid for (provided that it is not +# exchanged for another one first). +# This option can be used to automatically log-out inactive sessions. +# Please see the manual for more information. +# +# Note also that this is calculated at login time and refresh time: +# changes are not applied to existing sessions until they are refreshed. +# +# By default, this is infinite. +# +#refresh_token_lifetime: 24h + +# Time that an access token remains valid for, if the session is NOT +# using refresh tokens. +# Please note that not all clients support refresh tokens, so setting +# this to a short value may be inconvenient for some users who will +# then be logged out frequently. +# +# Note also that this is calculated at login time: changes are not applied +# retrospectively to existing sessions for users that have already logged in. +# +# By default, this is infinite. +# +#nonrefreshable_access_token_lifetime: 24h + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 68a4985398..7a059c6dec 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -220,6 +220,44 @@ class RegistrationConfig(Config): # #session_lifetime: 24h + # Time that an access token remains valid for, if the session is + # using refresh tokens. + # For more information about refresh tokens, please see the manual. + # Note that this only applies to clients which advertise support for + # refresh tokens. + # + # Note also that this is calculated at login time and refresh time: + # changes are not applied to existing sessions until they are refreshed. + # + # By default, this is 5 minutes. + # + #refreshable_access_token_lifetime: 5m + + # Time that a refresh token remains valid for (provided that it is not + # exchanged for another one first). + # This option can be used to automatically log-out inactive sessions. + # Please see the manual for more information. + # + # Note also that this is calculated at login time and refresh time: + # changes are not applied to existing sessions until they are refreshed. + # + # By default, this is infinite. + # + #refresh_token_lifetime: 24h + + # Time that an access token remains valid for, if the session is NOT + # using refresh tokens. + # Please note that not all clients support refresh tokens, so setting + # this to a short value may be inconvenient for some users who will + # then be logged out frequently. + # + # Note also that this is calculated at login time: changes are not applied + # retrospectively to existing sessions for users that have already logged in. + # + # By default, this is infinite. + # + #nonrefreshable_access_token_lifetime: 24h + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index 1b23fa18cf..f9994658c4 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -72,7 +72,7 @@ class LoginRestServlet(RestServlet): JWT_TYPE_DEPRECATED = "m.login.jwt" APPSERVICE_TYPE = "m.login.application_service" APPSERVICE_TYPE_UNSTABLE = "uk.half-shot.msc2778.login.application_service" - REFRESH_TOKEN_PARAM = "org.matrix.msc2918.refresh_token" + REFRESH_TOKEN_PARAM = "refresh_token" def __init__(self, hs: "HomeServer"): super().__init__() @@ -90,7 +90,7 @@ class LoginRestServlet(RestServlet): self.saml2_enabled = hs.config.saml2.saml2_enabled self.cas_enabled = hs.config.cas.cas_enabled self.oidc_enabled = hs.config.oidc.oidc_enabled - self._msc2918_enabled = ( + self._refresh_tokens_enabled = ( hs.config.registration.refreshable_access_token_lifetime is not None ) @@ -163,17 +163,16 @@ class LoginRestServlet(RestServlet): async def on_POST(self, request: SynapseRequest) -> Tuple[int, LoginResponse]: login_submission = parse_json_object_from_request(request) - if self._msc2918_enabled: - # Check if this login should also issue a refresh token, as per MSC2918 - should_issue_refresh_token = login_submission.get( - "org.matrix.msc2918.refresh_token", False - ) - if not isinstance(should_issue_refresh_token, bool): - raise SynapseError( - 400, "`org.matrix.msc2918.refresh_token` should be true or false." - ) - else: - should_issue_refresh_token = False + # Check to see if the client requested a refresh token. + client_requested_refresh_token = login_submission.get( + LoginRestServlet.REFRESH_TOKEN_PARAM, False + ) + if not isinstance(client_requested_refresh_token, bool): + raise SynapseError(400, "`refresh_token` should be true or false.") + + should_issue_refresh_token = ( + self._refresh_tokens_enabled and client_requested_refresh_token + ) try: if login_submission["type"] in ( @@ -463,9 +462,7 @@ def _get_auth_flow_dict_for_idp(idp: SsoIdentityProvider) -> JsonDict: class RefreshTokenServlet(RestServlet): - PATTERNS = client_patterns( - "/org.matrix.msc2918.refresh_token/refresh$", releases=(), unstable=True - ) + PATTERNS = (re.compile("^/_matrix/client/v1/refresh$"),) def __init__(self, hs: "HomeServer"): self._auth_handler = hs.get_auth_handler() diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index 11fd6cd24d..8b56c76aed 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -419,7 +419,7 @@ class RegisterRestServlet(RestServlet): self.password_policy_handler = hs.get_password_policy_handler() self.clock = hs.get_clock() self._registration_enabled = self.hs.config.registration.enable_registration - self._msc2918_enabled = ( + self._refresh_tokens_enabled = ( hs.config.registration.refreshable_access_token_lifetime is not None ) @@ -445,18 +445,15 @@ class RegisterRestServlet(RestServlet): f"Do not understand membership kind: {kind}", ) - if self._msc2918_enabled: - # Check if this registration should also issue a refresh token, as - # per MSC2918 - should_issue_refresh_token = body.get( - "org.matrix.msc2918.refresh_token", False - ) - if not isinstance(should_issue_refresh_token, bool): - raise SynapseError( - 400, "`org.matrix.msc2918.refresh_token` should be true or false." - ) - else: - should_issue_refresh_token = False + # Check if the clients wishes for this registration to issue a refresh + # token. + client_requested_refresh_tokens = body.get("refresh_token", False) + if not isinstance(client_requested_refresh_tokens, bool): + raise SynapseError(400, "`refresh_token` should be true or false.") + + should_issue_refresh_token = ( + self._refresh_tokens_enabled and client_requested_refresh_tokens + ) # Pull out the provided username and do basic sanity checks early since # the auth layer will store these in sessions. diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index 7239e1a1b5..aa8ad6d2e1 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -520,7 +520,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): """ return self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": refresh_token}, ) @@ -557,7 +557,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): login_with_refresh = self.make_request( "POST", "/_matrix/client/r0/login", - {"org.matrix.msc2918.refresh_token": True, **body}, + {"refresh_token": True, **body}, ) self.assertEqual(login_with_refresh.code, 200, login_with_refresh.result) self.assertIn("refresh_token", login_with_refresh.json_body) @@ -588,7 +588,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): "username": "test3", "password": self.user_pass, "auth": {"type": LoginType.DUMMY}, - "org.matrix.msc2918.refresh_token": True, + "refresh_token": True, }, ) self.assertEqual(register_with_refresh.code, 200, register_with_refresh.result) @@ -603,7 +603,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): "type": "m.login.password", "user": "test", "password": self.user_pass, - "org.matrix.msc2918.refresh_token": True, + "refresh_token": True, } login_response = self.make_request( "POST", @@ -614,7 +614,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): refresh_response = self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": login_response.json_body["refresh_token"]}, ) self.assertEqual(refresh_response.code, 200, refresh_response.result) @@ -641,7 +641,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): "type": "m.login.password", "user": "test", "password": self.user_pass, - "org.matrix.msc2918.refresh_token": True, + "refresh_token": True, } login_response = self.make_request( "POST", @@ -655,7 +655,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): refresh_response = self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": login_response.json_body["refresh_token"]}, ) self.assertEqual(refresh_response.code, 200, refresh_response.result) @@ -761,7 +761,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): "type": "m.login.password", "user": "test", "password": self.user_pass, - "org.matrix.msc2918.refresh_token": True, + "refresh_token": True, } login_response = self.make_request( "POST", @@ -811,7 +811,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): "type": "m.login.password", "user": "test", "password": self.user_pass, - "org.matrix.msc2918.refresh_token": True, + "refresh_token": True, } login_response = self.make_request( "POST", @@ -868,7 +868,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): "type": "m.login.password", "user": "test", "password": self.user_pass, - "org.matrix.msc2918.refresh_token": True, + "refresh_token": True, } login_response = self.make_request( "POST", @@ -880,7 +880,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): # This first refresh should work properly first_refresh_response = self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": login_response.json_body["refresh_token"]}, ) self.assertEqual( @@ -890,7 +890,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): # This one as well, since the token in the first one was never used second_refresh_response = self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": login_response.json_body["refresh_token"]}, ) self.assertEqual( @@ -900,7 +900,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): # This one should not, since the token from the first refresh is not valid anymore third_refresh_response = self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": first_refresh_response.json_body["refresh_token"]}, ) self.assertEqual( @@ -928,7 +928,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): # Now that the access token from the last valid refresh was used once, refreshing with the N-1 token should fail fourth_refresh_response = self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": login_response.json_body["refresh_token"]}, ) self.assertEqual( @@ -938,7 +938,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): # But refreshing from the last valid refresh token still works fifth_refresh_response = self.make_request( "POST", - "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + "/_matrix/client/v1/refresh", {"refresh_token": second_refresh_response.json_body["refresh_token"]}, ) self.assertEqual( -- cgit 1.5.1