diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index bc8364400d..d64910aded 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -12,12 +12,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
-from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Iterable, Optional
+from typing import (
+ TYPE_CHECKING,
+ AsyncContextManager,
+ Awaitable,
+ Callable,
+ Dict,
+ Iterable,
+ Optional,
+)
+
+import attr
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.storage.types import Connection
from synapse.types import JsonDict
-from synapse.util import json_encoder
+from synapse.util import Clock, json_encoder
from . import engines
@@ -28,6 +38,45 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
+ON_UPDATE_CALLBACK = Callable[[str, str, bool], AsyncContextManager[int]]
+DEFAULT_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]]
+MIN_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]]
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class _BackgroundUpdateHandler:
+ """A handler for a given background update.
+
+ Attributes:
+ callback: The function to call to make progress on the background
+ update.
+ oneshot: Wether the update is likely to happen all in one go, ignoring
+ the supplied target duration, e.g. index creation. This is used by
+ the update controller to help correctly schedule the update.
+ """
+
+ callback: Callable[[JsonDict, int], Awaitable[int]]
+ oneshot: bool = False
+
+
+class _BackgroundUpdateContextManager:
+ BACKGROUND_UPDATE_INTERVAL_MS = 1000
+ BACKGROUND_UPDATE_DURATION_MS = 100
+
+ def __init__(self, sleep: bool, clock: Clock):
+ self._sleep = sleep
+ self._clock = clock
+
+ async def __aenter__(self) -> int:
+ if self._sleep:
+ await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000)
+
+ return self.BACKGROUND_UPDATE_DURATION_MS
+
+ async def __aexit__(self, *exc) -> None:
+ pass
+
+
class BackgroundUpdatePerformance:
"""Tracks the how long a background update is taking to update its items"""
@@ -84,20 +133,22 @@ class BackgroundUpdater:
MINIMUM_BACKGROUND_BATCH_SIZE = 1
DEFAULT_BACKGROUND_BATCH_SIZE = 100
- BACKGROUND_UPDATE_INTERVAL_MS = 1000
- BACKGROUND_UPDATE_DURATION_MS = 100
def __init__(self, hs: "HomeServer", database: "DatabasePool"):
self._clock = hs.get_clock()
self.db_pool = database
+ self._database_name = database.name()
+
# if a background update is currently running, its name.
self._current_background_update: Optional[str] = None
+ self._on_update_callback: Optional[ON_UPDATE_CALLBACK] = None
+ self._default_batch_size_callback: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None
+ self._min_batch_size_callback: Optional[MIN_BATCH_SIZE_CALLBACK] = None
+
self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {}
- self._background_update_handlers: Dict[
- str, Callable[[JsonDict, int], Awaitable[int]]
- ] = {}
+ self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {}
self._all_done = False
# Whether we're currently running updates
@@ -107,6 +158,83 @@ class BackgroundUpdater:
# enable/disable background updates via the admin API.
self.enabled = True
+ def register_update_controller_callbacks(
+ self,
+ on_update: ON_UPDATE_CALLBACK,
+ default_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None,
+ min_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None,
+ ) -> None:
+ """Register callbacks from a module for each hook."""
+ if self._on_update_callback is not None:
+ logger.warning(
+ "More than one module tried to register callbacks for controlling"
+ " background updates. Only the callbacks registered by the first module"
+ " (in order of appearance in Synapse's configuration file) that tried to"
+ " do so will be called."
+ )
+
+ return
+
+ self._on_update_callback = on_update
+
+ if default_batch_size is not None:
+ self._default_batch_size_callback = default_batch_size
+
+ if min_batch_size is not None:
+ self._min_batch_size_callback = min_batch_size
+
+ def _get_context_manager_for_update(
+ self,
+ sleep: bool,
+ update_name: str,
+ database_name: str,
+ oneshot: bool,
+ ) -> AsyncContextManager[int]:
+ """Get a context manager to run a background update with.
+
+ If a module has registered a `update_handler` callback, use the context manager
+ it returns.
+
+ Otherwise, returns a context manager that will return a default value, optionally
+ sleeping if needed.
+
+ Args:
+ sleep: Whether we can sleep between updates.
+ update_name: The name of the update.
+ database_name: The name of the database the update is being run on.
+ oneshot: Whether the update will complete all in one go, e.g. index creation.
+ In such cases the returned target duration is ignored.
+
+ Returns:
+ The target duration in milliseconds that the background update should run for.
+
+ Note: this is a *target*, and an iteration may take substantially longer or
+ shorter.
+ """
+ if self._on_update_callback is not None:
+ return self._on_update_callback(update_name, database_name, oneshot)
+
+ return _BackgroundUpdateContextManager(sleep, self._clock)
+
+ async def _default_batch_size(self, update_name: str, database_name: str) -> int:
+ """The batch size to use for the first iteration of a new background
+ update.
+ """
+ if self._default_batch_size_callback is not None:
+ return await self._default_batch_size_callback(update_name, database_name)
+
+ return self.DEFAULT_BACKGROUND_BATCH_SIZE
+
+ async def _min_batch_size(self, update_name: str, database_name: str) -> int:
+ """A lower bound on the batch size of a new background update.
+
+ Used to ensure that progress is always made. Must be greater than 0.
+ """
+ if self._min_batch_size_callback is not None:
+ return await self._min_batch_size_callback(update_name, database_name)
+
+ return self.MINIMUM_BACKGROUND_BATCH_SIZE
+
def get_current_update(self) -> Optional[BackgroundUpdatePerformance]:
"""Returns the current background update, if any."""
@@ -135,13 +263,8 @@ class BackgroundUpdater:
try:
logger.info("Starting background schema updates")
while self.enabled:
- if sleep:
- await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.0)
-
try:
- result = await self.do_next_background_update(
- self.BACKGROUND_UPDATE_DURATION_MS
- )
+ result = await self.do_next_background_update(sleep)
except Exception:
logger.exception("Error doing update")
else:
@@ -203,13 +326,15 @@ class BackgroundUpdater:
return not update_exists
- async def do_next_background_update(self, desired_duration_ms: float) -> bool:
+ async def do_next_background_update(self, sleep: bool = True) -> bool:
"""Does some amount of work on the next queued background update
Returns once some amount of work is done.
Args:
- desired_duration_ms: How long we want to spend updating.
+ sleep: Whether to limit how quickly we run background updates or
+ not.
+
Returns:
True if we have finished running all the background updates, otherwise False
"""
@@ -252,7 +377,19 @@ class BackgroundUpdater:
self._current_background_update = upd["update_name"]
- await self._do_background_update(desired_duration_ms)
+ # We have a background update to run, otherwise we would have returned
+ # early.
+ assert self._current_background_update is not None
+ update_info = self._background_update_handlers[self._current_background_update]
+
+ async with self._get_context_manager_for_update(
+ sleep=sleep,
+ update_name=self._current_background_update,
+ database_name=self._database_name,
+ oneshot=update_info.oneshot,
+ ) as desired_duration_ms:
+ await self._do_background_update(desired_duration_ms)
+
return False
async def _do_background_update(self, desired_duration_ms: float) -> int:
@@ -260,7 +397,7 @@ class BackgroundUpdater:
update_name = self._current_background_update
logger.info("Starting update batch on background update '%s'", update_name)
- update_handler = self._background_update_handlers[update_name]
+ update_handler = self._background_update_handlers[update_name].callback
performance = self._background_update_performance.get(update_name)
@@ -273,9 +410,14 @@ class BackgroundUpdater:
if items_per_ms is not None:
batch_size = int(desired_duration_ms * items_per_ms)
# Clamp the batch size so that we always make progress
- batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE)
+ batch_size = max(
+ batch_size,
+ await self._min_batch_size(update_name, self._database_name),
+ )
else:
- batch_size = self.DEFAULT_BACKGROUND_BATCH_SIZE
+ batch_size = await self._default_batch_size(
+ update_name, self._database_name
+ )
progress_json = await self.db_pool.simple_select_one_onecol(
"background_updates",
@@ -294,6 +436,8 @@ class BackgroundUpdater:
duration_ms = time_stop - time_start
+ performance.update(items_updated, duration_ms)
+
logger.info(
"Running background update %r. Processed %r items in %rms."
" (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)",
@@ -306,8 +450,6 @@ class BackgroundUpdater:
batch_size,
)
- performance.update(items_updated, duration_ms)
-
return len(self._background_update_performance)
def register_background_update_handler(
@@ -331,7 +473,9 @@ class BackgroundUpdater:
update_name: The name of the update that this code handles.
update_handler: The function that does the update.
"""
- self._background_update_handlers[update_name] = update_handler
+ self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
+ update_handler
+ )
def register_noop_background_update(self, update_name: str) -> None:
"""Register a noop handler for a background update.
@@ -453,7 +597,9 @@ class BackgroundUpdater:
await self._end_background_update(update_name)
return 1
- self.register_background_update_handler(update_name, updater)
+ self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
+ updater, oneshot=True
+ )
async def _end_background_update(self, update_name: str) -> None:
"""Removes a completed background update task from the queue.
|