summary refs log tree commit diff
path: root/synapse/storage/background_updates.py
diff options
context:
space:
mode:
authorDavid Robertson <davidr@element.io>2023-06-07 18:12:23 +0100
committerGitHub <noreply@github.com>2023-06-07 17:12:23 +0000
commitd162aecaac52fb467822e319e4c3c5b216c33ca9 (patch)
tree8a92ee76319fb5ed9f0c26de7df0067758b46296 /synapse/storage/background_updates.py
parentRemove superfluous `room_memberships` join from background update (#15733) (diff)
downloadsynapse-d162aecaac52fb467822e319e4c3c5b216c33ca9.tar.xz
Quick & dirty metric for background update status (#15740)
* Quick & dirty metric for background update status

* Changelog

* Remove debug

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>

* Actually write to _aborted

---------

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>
Diffstat (limited to 'synapse/storage/background_updates.py')
-rw-r--r--synapse/storage/background_updates.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index ca085ef800..edc97a9d61 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from enum import IntEnum
 from types import TracebackType
 from typing import (
     TYPE_CHECKING,
@@ -136,6 +137,15 @@ class BackgroundUpdatePerformance:
             return float(self.total_item_count) / float(self.total_duration_ms)
 
 
+class UpdaterStatus(IntEnum):
+    # Use negative values for error conditions.
+    ABORTED = -1
+    DISABLED = 0
+    NOT_STARTED = 1
+    RUNNING_UPDATE = 2
+    COMPLETE = 3
+
+
 class BackgroundUpdater:
     """Background updates are updates to the database that run in the
     background. Each update processes a batch of data at once. We attempt to
@@ -158,11 +168,16 @@ class BackgroundUpdater:
 
         self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {}
         self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {}
+        # TODO: all these bool flags make me feel icky---can we combine into a status
+        # enum?
         self._all_done = False
 
         # Whether we're currently running updates
         self._running = False
 
+        # Marker to be set if we abort and halt all background updates.
+        self._aborted = False
+
         # Whether background updates are enabled. This allows us to
         # enable/disable background updates via the admin API.
         self.enabled = True
@@ -175,6 +190,20 @@ class BackgroundUpdater:
         self.sleep_duration_ms = hs.config.background_updates.sleep_duration_ms
         self.sleep_enabled = hs.config.background_updates.sleep_enabled
 
+    def get_status(self) -> UpdaterStatus:
+        """An integer summarising the updater status. Used as a metric."""
+        if self._aborted:
+            return UpdaterStatus.ABORTED
+        # TODO: a status for "have seen at least one failure, but haven't aborted yet".
+        if not self.enabled:
+            return UpdaterStatus.DISABLED
+
+        if self._all_done:
+            return UpdaterStatus.COMPLETE
+        if self._running:
+            return UpdaterStatus.RUNNING_UPDATE
+        return UpdaterStatus.NOT_STARTED
+
     def register_update_controller_callbacks(
         self,
         on_update: ON_UPDATE_CALLBACK,
@@ -296,6 +325,7 @@ class BackgroundUpdater:
                 except Exception:
                     back_to_back_failures += 1
                     if back_to_back_failures >= 5:
+                        self._aborted = True
                         raise RuntimeError(
                             "5 back-to-back background update failures; aborting."
                         )