From 26b960b08ba0110ef3246e5749bb75b9b04a231c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 6 Oct 2023 07:22:55 -0400 Subject: Register media servlets via regex. (#16419) This converts the media servlet URLs in the same way as (most) of the rest of Synapse. This will give more flexibility in the versions each endpoint exists under. --- synapse/http/server.py | 2 +- synapse/media/_base.py | 48 +++---------------------- synapse/media/media_repository.py | 10 +++++- synapse/rest/media/config_resource.py | 13 ++++--- synapse/rest/media/download_resource.py | 40 +++++++++++++-------- synapse/rest/media/media_repository_resource.py | 33 +++++++++++------ synapse/rest/media/preview_url_resource.py | 26 +++++--------- synapse/rest/media/thumbnail_resource.py | 35 ++++++++++-------- synapse/rest/media/upload_resource.py | 14 ++++---- 9 files changed, 103 insertions(+), 118 deletions(-) (limited to 'synapse') diff --git a/synapse/http/server.py b/synapse/http/server.py index 3bbf91298e..1e4e56f36b 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -266,7 +266,7 @@ class HttpServer(Protocol): def register_paths( self, method: str, - path_patterns: Iterable[Pattern], + path_patterns: Iterable[Pattern[str]], callback: ServletCallback, servlet_classname: str, ) -> None: diff --git a/synapse/media/_base.py b/synapse/media/_base.py index 80c448de2b..d103b43449 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -26,11 +26,11 @@ from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender from twisted.web.server import Request -from synapse.api.errors import Codes, SynapseError, cs_error +from synapse.api.errors import Codes, cs_error from synapse.http.server import finish_request, respond_with_json from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable -from synapse.util.stringutils import is_ascii, parse_and_validate_server_name +from synapse.util.stringutils import is_ascii logger = logging.getLogger(__name__) @@ -84,52 +84,12 @@ INLINE_CONTENT_TYPES = [ ] -def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: - """Parses the server name, media ID and optional file name from the request URI - - Also performs some rough validation on the server name. - - Args: - request: The `Request`. - - Returns: - A tuple containing the parsed server name, media ID and optional file name. - - Raises: - SynapseError(404): if parsing or validation fail for any reason - """ - try: - # The type on postpath seems incorrect in Twisted 21.2.0. - postpath: List[bytes] = request.postpath # type: ignore - assert postpath - - # This allows users to append e.g. /test.png to the URL. Useful for - # clients that parse the URL to see content type. - server_name_bytes, media_id_bytes = postpath[:2] - server_name = server_name_bytes.decode("utf-8") - media_id = media_id_bytes.decode("utf8") - - # Validate the server name, raising if invalid - parse_and_validate_server_name(server_name) - - file_name = None - if len(postpath) > 2: - try: - file_name = urllib.parse.unquote(postpath[-1].decode("utf-8")) - except UnicodeDecodeError: - pass - return server_name, media_id, file_name - except Exception: - raise SynapseError( - 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN - ) - - def respond_404(request: SynapseRequest) -> None: + assert request.path is not None respond_with_json( request, 404, - cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + cs_error("Not found '%s'" % (request.path.decode(),), code=Codes.NOT_FOUND), send_cors=True, ) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index 1b7b014f9a..d11c2ff4ee 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -48,6 +48,7 @@ from synapse.media.filepath import MediaFilePaths from synapse.media.media_storage import MediaStorage from synapse.media.storage_provider import StorageProviderWrapper from synapse.media.thumbnailer import Thumbnailer, ThumbnailError +from synapse.media.url_previewer import UrlPreviewer from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import UserID from synapse.util.async_helpers import Linearizer @@ -114,7 +115,7 @@ class MediaRepository: ) storage_providers.append(provider) - self.media_storage = MediaStorage( + self.media_storage: MediaStorage = MediaStorage( self.hs, self.primary_base_path, self.filepaths, storage_providers ) @@ -142,6 +143,13 @@ class MediaRepository: MEDIA_RETENTION_CHECK_PERIOD_MS, ) + if hs.config.media.url_preview_enabled: + self.url_previewer: Optional[UrlPreviewer] = UrlPreviewer( + hs, self, self.media_storage + ) + else: + self.url_previewer = None + def _start_update_recently_accessed(self) -> Deferred: return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed diff --git a/synapse/rest/media/config_resource.py b/synapse/rest/media/config_resource.py index a95804d327..dbf5133c72 100644 --- a/synapse/rest/media/config_resource.py +++ b/synapse/rest/media/config_resource.py @@ -14,17 +14,19 @@ # limitations under the License. # +import re from typing import TYPE_CHECKING -from synapse.http.server import DirectServeJsonResource, respond_with_json +from synapse.http.server import respond_with_json +from synapse.http.servlet import RestServlet from synapse.http.site import SynapseRequest if TYPE_CHECKING: from synapse.server import HomeServer -class MediaConfigResource(DirectServeJsonResource): - isLeaf = True +class MediaConfigResource(RestServlet): + PATTERNS = [re.compile("/_matrix/media/(r0|v3|v1)/config$")] def __init__(self, hs: "HomeServer"): super().__init__() @@ -33,9 +35,6 @@ class MediaConfigResource(DirectServeJsonResource): self.auth = hs.get_auth() self.limits_dict = {"m.upload.size": config.media.max_upload_size} - async def _async_render_GET(self, request: SynapseRequest) -> None: + async def on_GET(self, request: SynapseRequest) -> None: await self.auth.get_user_by_req(request) respond_with_json(request, 200, self.limits_dict, send_cors=True) - - async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: - respond_with_json(request, 200, {}, send_cors=True) diff --git a/synapse/rest/media/download_resource.py b/synapse/rest/media/download_resource.py index 3c618ef60a..65b9ff52fa 100644 --- a/synapse/rest/media/download_resource.py +++ b/synapse/rest/media/download_resource.py @@ -13,16 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING +import re +from typing import TYPE_CHECKING, Optional -from synapse.http.server import ( - DirectServeJsonResource, - set_corp_headers, - set_cors_headers, -) -from synapse.http.servlet import parse_boolean +from synapse.http.server import set_corp_headers, set_cors_headers +from synapse.http.servlet import RestServlet, parse_boolean from synapse.http.site import SynapseRequest -from synapse.media._base import parse_media_id, respond_404 +from synapse.media._base import respond_404 +from synapse.util.stringutils import parse_and_validate_server_name if TYPE_CHECKING: from synapse.media.media_repository import MediaRepository @@ -31,15 +29,28 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class DownloadResource(DirectServeJsonResource): - isLeaf = True +class DownloadResource(RestServlet): + PATTERNS = [ + re.compile( + "/_matrix/media/(r0|v3|v1)/download/(?P[^/]*)/(?P[^/]*)(/(?P[^/]*))?$" + ) + ] def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"): super().__init__() self.media_repo = media_repo self._is_mine_server_name = hs.is_mine_server_name - async def _async_render_GET(self, request: SynapseRequest) -> None: + async def on_GET( + self, + request: SynapseRequest, + server_name: str, + media_id: str, + file_name: Optional[str] = None, + ) -> None: + # Validate the server name, raising if invalid + parse_and_validate_server_name(server_name) + set_cors_headers(request) set_corp_headers(request) request.setHeader( @@ -58,9 +69,8 @@ class DownloadResource(DirectServeJsonResource): b"Referrer-Policy", b"no-referrer", ) - server_name, media_id, name = parse_media_id(request) if self._is_mine_server_name(server_name): - await self.media_repo.get_local_media(request, media_id, name) + await self.media_repo.get_local_media(request, media_id, file_name) else: allow_remote = parse_boolean(request, "allow_remote", default=True) if not allow_remote: @@ -72,4 +82,6 @@ class DownloadResource(DirectServeJsonResource): respond_404(request) return - await self.media_repo.get_remote_media(request, server_name, media_id, name) + await self.media_repo.get_remote_media( + request, server_name, media_id, file_name + ) diff --git a/synapse/rest/media/media_repository_resource.py b/synapse/rest/media/media_repository_resource.py index 5ebaa3b032..2089bb1029 100644 --- a/synapse/rest/media/media_repository_resource.py +++ b/synapse/rest/media/media_repository_resource.py @@ -15,7 +15,7 @@ from typing import TYPE_CHECKING from synapse.config._base import ConfigError -from synapse.http.server import UnrecognizedRequestResource +from synapse.http.server import HttpServer, JsonResource from .config_resource import MediaConfigResource from .download_resource import DownloadResource @@ -27,7 +27,7 @@ if TYPE_CHECKING: from synapse.server import HomeServer -class MediaRepositoryResource(UnrecognizedRequestResource): +class MediaRepositoryResource(JsonResource): """File uploading and downloading. Uploads are POSTed to a resource which returns a token which is used to GET @@ -70,6 +70,11 @@ class MediaRepositoryResource(UnrecognizedRequestResource): width and height are close to the requested size and the aspect matches the requested size. The client should scale the image if it needs to fit within a given rectangle. + + This gets mounted at various points under /_matrix/media, including: + * /_matrix/media/r0 + * /_matrix/media/v1 + * /_matrix/media/v3 """ def __init__(self, hs: "HomeServer"): @@ -77,17 +82,23 @@ class MediaRepositoryResource(UnrecognizedRequestResource): if not hs.config.media.can_load_media_repo: raise ConfigError("Synapse is not configured to use a media repo.") - super().__init__() + JsonResource.__init__(self, hs, canonical_json=False) + self.register_servlets(self, hs) + + @staticmethod + def register_servlets(http_server: HttpServer, hs: "HomeServer") -> None: media_repo = hs.get_media_repository() - self.putChild(b"upload", UploadResource(hs, media_repo)) - self.putChild(b"download", DownloadResource(hs, media_repo)) - self.putChild( - b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage) + # Note that many of these should not exist as v1 endpoints, but empirically + # a lot of traffic still goes to them. + + UploadResource(hs, media_repo).register(http_server) + DownloadResource(hs, media_repo).register(http_server) + ThumbnailResource(hs, media_repo, media_repo.media_storage).register( + http_server ) if hs.config.media.url_preview_enabled: - self.putChild( - b"preview_url", - PreviewUrlResource(hs, media_repo, media_repo.media_storage), + PreviewUrlResource(hs, media_repo, media_repo.media_storage).register( + http_server ) - self.putChild(b"config", MediaConfigResource(hs)) + MediaConfigResource(hs).register(http_server) diff --git a/synapse/rest/media/preview_url_resource.py b/synapse/rest/media/preview_url_resource.py index 58513c4be4..c8acb65dca 100644 --- a/synapse/rest/media/preview_url_resource.py +++ b/synapse/rest/media/preview_url_resource.py @@ -13,24 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re from typing import TYPE_CHECKING -from synapse.http.server import ( - DirectServeJsonResource, - respond_with_json, - respond_with_json_bytes, -) -from synapse.http.servlet import parse_integer, parse_string +from synapse.http.server import respond_with_json_bytes +from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.media.media_storage import MediaStorage -from synapse.media.url_previewer import UrlPreviewer if TYPE_CHECKING: from synapse.media.media_repository import MediaRepository from synapse.server import HomeServer -class PreviewUrlResource(DirectServeJsonResource): +class PreviewUrlResource(RestServlet): """ The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix @@ -48,7 +44,7 @@ class PreviewUrlResource(DirectServeJsonResource): * Matrix cannot be used to distribute the metadata between homeservers. """ - isLeaf = True + PATTERNS = [re.compile("/_matrix/media/(r0|v3|v1)/preview_url$")] def __init__( self, @@ -62,14 +58,10 @@ class PreviewUrlResource(DirectServeJsonResource): self.clock = hs.get_clock() self.media_repo = media_repo self.media_storage = media_storage + assert self.media_repo.url_previewer is not None + self.url_previewer = self.media_repo.url_previewer - self._url_previewer = UrlPreviewer(hs, media_repo, media_storage) - - async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: - request.setHeader(b"Allow", b"OPTIONS, GET") - respond_with_json(request, 200, {}, send_cors=True) - - async def _async_render_GET(self, request: SynapseRequest) -> None: + async def on_GET(self, request: SynapseRequest) -> None: # XXX: if get_user_by_req fails, what should we do in an async render? requester = await self.auth.get_user_by_req(request) url = parse_string(request, "url", required=True) @@ -77,5 +69,5 @@ class PreviewUrlResource(DirectServeJsonResource): if ts is None: ts = self.clock.time_msec() - og = await self._url_previewer.preview(url, requester.user, ts) + og = await self.url_previewer.preview(url, requester.user, ts) respond_with_json_bytes(request, 200, og, send_cors=True) diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py index 661e604b85..f9cd773f77 100644 --- a/synapse/rest/media/thumbnail_resource.py +++ b/synapse/rest/media/thumbnail_resource.py @@ -13,29 +13,24 @@ # See the License for the specific language governing permissions and # limitations under the License. - import logging +import re from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple from synapse.api.errors import Codes, SynapseError, cs_error from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP -from synapse.http.server import ( - DirectServeJsonResource, - respond_with_json, - set_corp_headers, - set_cors_headers, -) -from synapse.http.servlet import parse_integer, parse_string +from synapse.http.server import respond_with_json, set_corp_headers, set_cors_headers +from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.media._base import ( FileInfo, ThumbnailInfo, - parse_media_id, respond_404, respond_with_file, respond_with_responder, ) from synapse.media.media_storage import MediaStorage +from synapse.util.stringutils import parse_and_validate_server_name if TYPE_CHECKING: from synapse.media.media_repository import MediaRepository @@ -44,8 +39,12 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class ThumbnailResource(DirectServeJsonResource): - isLeaf = True +class ThumbnailResource(RestServlet): + PATTERNS = [ + re.compile( + "/_matrix/media/(r0|v3|v1)/thumbnail/(?P[^/]*)/(?P[^/]*)$" + ) + ] def __init__( self, @@ -60,12 +59,17 @@ class ThumbnailResource(DirectServeJsonResource): self.media_storage = media_storage self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails self._is_mine_server_name = hs.is_mine_server_name + self._server_name = hs.hostname self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from - async def _async_render_GET(self, request: SynapseRequest) -> None: + async def on_GET( + self, request: SynapseRequest, server_name: str, media_id: str + ) -> None: + # Validate the server name, raising if invalid + parse_and_validate_server_name(server_name) + set_cors_headers(request) set_corp_headers(request) - server_name, media_id, _ = parse_media_id(request) width = parse_integer(request, "width", required=True) height = parse_integer(request, "height", required=True) method = parse_string(request, "method", "scale") @@ -418,13 +422,14 @@ class ThumbnailResource(DirectServeJsonResource): # `dynamic_thumbnails` is disabled. logger.info("Failed to find any generated thumbnails") + assert request.path is not None respond_with_json( request, 400, cs_error( - "Cannot find any thumbnails for the requested media (%r). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)" + "Cannot find any thumbnails for the requested media ('%s'). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)" % ( - request.postpath, + request.path.decode(), ", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()), ), code=Codes.UNKNOWN, diff --git a/synapse/rest/media/upload_resource.py b/synapse/rest/media/upload_resource.py index 043e8d6077..949326d85d 100644 --- a/synapse/rest/media/upload_resource.py +++ b/synapse/rest/media/upload_resource.py @@ -14,11 +14,12 @@ # limitations under the License. import logging +import re from typing import IO, TYPE_CHECKING, Dict, List, Optional from synapse.api.errors import Codes, SynapseError -from synapse.http.server import DirectServeJsonResource, respond_with_json -from synapse.http.servlet import parse_bytes_from_args +from synapse.http.server import respond_with_json +from synapse.http.servlet import RestServlet, parse_bytes_from_args from synapse.http.site import SynapseRequest from synapse.media.media_storage import SpamMediaException @@ -29,8 +30,8 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class UploadResource(DirectServeJsonResource): - isLeaf = True +class UploadResource(RestServlet): + PATTERNS = [re.compile("/_matrix/media/(r0|v3|v1)/upload")] def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"): super().__init__() @@ -43,10 +44,7 @@ class UploadResource(DirectServeJsonResource): self.max_upload_size = hs.config.media.max_upload_size self.clock = hs.get_clock() - async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: - respond_with_json(request, 200, {}, send_cors=True) - - async def _async_render_POST(self, request: SynapseRequest) -> None: + async def on_POST(self, request: SynapseRequest) -> None: requester = await self.auth.get_user_by_req(request) raw_content_length = request.getHeader("Content-Length") if raw_content_length is None: -- cgit 1.5.1