diff --git a/changelog.d/8887.feature b/changelog.d/8887.feature
new file mode 100644
index 0000000000..729eb1f1ea
--- /dev/null
+++ b/changelog.d/8887.feature
@@ -0,0 +1 @@
+Add `X-Robots-Tag` header to stop web crawlers from indexing media.
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 67aa993f19..47c2b44bff 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -155,6 +155,11 @@ def add_file_headers(request, media_type, file_size, upload_name):
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
request.setHeader(b"Content-Length", b"%d" % (file_size,))
+ # Tell web crawlers to not index, archive, or follow links in media. This
+ # should help to prevent things in the media repo from showing up in web
+ # search results.
+ request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
+
# separators as defined in RFC2616. SP and HT are handled separately.
# see _can_encode_filename_as_token.
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 4c749f1a61..6f0677d335 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -362,3 +362,16 @@ class MediaRepoTests(unittest.HomeserverTestCase):
"error": "Not found [b'example.com', b'12345']",
},
)
+
+ def test_x_robots_tag_header(self):
+ """
+ Tests that the `X-Robots-Tag` header is present, which informs web crawlers
+ to not index, archive, or follow links in media.
+ """
+ channel = self._req(b"inline; filename=out" + self.test_image.extension)
+
+ headers = channel.headers
+ self.assertEqual(
+ headers.getRawHeaders(b"X-Robots-Tag"),
+ [b"noindex, nofollow, noarchive, noimageindex"],
+ )
|