summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/7706.feature1
-rw-r--r--docs/workers.md7
-rw-r--r--synapse/config/repository.py6
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py18
4 files changed, 28 insertions, 4 deletions
diff --git a/changelog.d/7706.feature b/changelog.d/7706.feature
new file mode 100644
index 0000000000..c6b3b20b55
--- /dev/null
+++ b/changelog.d/7706.feature
@@ -0,0 +1 @@
+Add support for running multiple media repository workers. See [docs/workers.md](docs/workers.md) for instructions.
diff --git a/docs/workers.md b/docs/workers.md
index 7512eff43a..f4cbbc0400 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -307,7 +307,12 @@ expose the `media` resource. For example:
            - media
 ```
 
-Note this worker cannot be load-balanced: only one instance should be active.
+Note that if running multiple media repositories they must be on the same server
+and you must configure a single instance to run the background tasks, e.g.:
+
+```yaml
+    media_instance_running_background_jobs: "media-repository-1"
+```
 
 ### `synapse.app.client_reader`
 
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index b751d02d37..01009f3924 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -94,6 +94,12 @@ class ContentRepositoryConfig(Config):
         else:
             self.can_load_media_repo = True
 
+        # Whether this instance should be the one to run the background jobs to
+        # e.g clean up old URL previews.
+        self.media_instance_running_background_jobs = config.get(
+            "media_instance_running_background_jobs",
+        )
+
         self.max_upload_size = self.parse_size(config.get("max_upload_size", "10M"))
         self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
         self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index f67e0fb3ec..b4645cd608 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -82,6 +82,15 @@ class PreviewUrlResource(DirectServeResource):
         self.primary_base_path = media_repo.primary_base_path
         self.media_storage = media_storage
 
+        # We run the background jobs if we're the instance specified (or no
+        # instance is specified, where we assume there is only one instance
+        # serving media).
+        instance_running_jobs = hs.config.media.media_instance_running_background_jobs
+        self._worker_run_media_background_jobs = (
+            instance_running_jobs is None
+            or instance_running_jobs == hs.get_instance_name()
+        )
+
         self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
         self.url_preview_accept_language = hs.config.url_preview_accept_language
 
@@ -94,9 +103,10 @@ class PreviewUrlResource(DirectServeResource):
             expiry_ms=60 * 60 * 1000,
         )
 
-        self._cleaner_loop = self.clock.looping_call(
-            self._start_expire_url_cache_data, 10 * 1000
-        )
+        if self._worker_run_media_background_jobs:
+            self._cleaner_loop = self.clock.looping_call(
+                self._start_expire_url_cache_data, 10 * 1000
+            )
 
     def render_OPTIONS(self, request):
         request.setHeader(b"Allow", b"OPTIONS, GET")
@@ -397,6 +407,8 @@ class PreviewUrlResource(DirectServeResource):
         """
         # TODO: Delete from backup media store
 
+        assert self._worker_run_media_background_jobs
+
         now = self.clock.time_msec()
 
         logger.debug("Running url preview cache expiry")