From cd9e72b185e36ac3f18ab1fe567fdeee87bfcb8e Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Tue, 8 Dec 2020 16:51:03 -0600 Subject: Add X-Robots-Tag header to stop crawlers from indexing media (#8887) Fixes / related to: https://github.com/matrix-org/synapse/issues/6533 This should do essentially the same thing as a robots.txt file telling robots to not index the media repo. https://developers.google.com/search/reference/robots_meta_tag Signed-off-by: Aaron Raimist --- tests/rest/media/v1/test_media_storage.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tests') diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 4c749f1a61..6f0677d335 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -362,3 +362,16 @@ class MediaRepoTests(unittest.HomeserverTestCase): "error": "Not found [b'example.com', b'12345']", }, ) + + def test_x_robots_tag_header(self): + """ + Tests that the `X-Robots-Tag` header is present, which informs web crawlers + to not index, archive, or follow links in media. + """ + channel = self._req(b"inline; filename=out" + self.test_image.extension) + + headers = channel.headers + self.assertEqual( + headers.getRawHeaders(b"X-Robots-Tag"), + [b"noindex, nofollow, noarchive, noimageindex"], + ) -- cgit 1.4.1