diff options
Diffstat (limited to 'synapse/media')
-rw-r--r-- | synapse/media/_base.py | 101 | ||||
-rw-r--r-- | synapse/media/media_repository.py | 21 | ||||
-rw-r--r-- | synapse/media/media_storage.py | 77 | ||||
-rw-r--r-- | synapse/media/oembed.py | 34 | ||||
-rw-r--r-- | synapse/media/preview_html.py | 79 | ||||
-rw-r--r-- | synapse/media/storage_provider.py | 20 | ||||
-rw-r--r-- | synapse/media/thumbnailer.py | 9 | ||||
-rw-r--r-- | synapse/media/url_previewer.py | 4 |
8 files changed, 221 insertions, 124 deletions
diff --git a/synapse/media/_base.py b/synapse/media/_base.py index ef8334ae25..13345acf75 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -26,11 +26,11 @@ from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender from twisted.web.server import Request -from synapse.api.errors import Codes, SynapseError, cs_error +from synapse.api.errors import Codes, cs_error from synapse.http.server import finish_request, respond_with_json from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable -from synapse.util.stringutils import is_ascii, parse_and_validate_server_name +from synapse.util.stringutils import is_ascii logger = logging.getLogger(__name__) @@ -50,53 +50,46 @@ TEXT_CONTENT_TYPES = [ "text/xml", ] - -def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: - """Parses the server name, media ID and optional file name from the request URI - - Also performs some rough validation on the server name. - - Args: - request: The `Request`. - - Returns: - A tuple containing the parsed server name, media ID and optional file name. - - Raises: - SynapseError(404): if parsing or validation fail for any reason - """ - try: - # The type on postpath seems incorrect in Twisted 21.2.0. - postpath: List[bytes] = request.postpath # type: ignore - assert postpath - - # This allows users to append e.g. /test.png to the URL. Useful for - # clients that parse the URL to see content type. - server_name_bytes, media_id_bytes = postpath[:2] - server_name = server_name_bytes.decode("utf-8") - media_id = media_id_bytes.decode("utf8") - - # Validate the server name, raising if invalid - parse_and_validate_server_name(server_name) - - file_name = None - if len(postpath) > 2: - try: - file_name = urllib.parse.unquote(postpath[-1].decode("utf-8")) - except UnicodeDecodeError: - pass - return server_name, media_id, file_name - except Exception: - raise SynapseError( - 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN - ) +# A list of all content types that are "safe" to be rendered inline in a browser. +INLINE_CONTENT_TYPES = [ + "text/css", + "text/plain", + "text/csv", + "application/json", + "application/ld+json", + # We allow some media files deemed as safe, which comes from the matrix-react-sdk. + # https://github.com/matrix-org/matrix-react-sdk/blob/a70fcfd0bcf7f8c85986da18001ea11597989a7c/src/utils/blobs.ts#L51 + # SVGs are *intentionally* omitted. + "image/jpeg", + "image/gif", + "image/png", + "image/apng", + "image/webp", + "image/avif", + "video/mp4", + "video/webm", + "video/ogg", + "video/quicktime", + "audio/mp4", + "audio/webm", + "audio/aac", + "audio/mpeg", + "audio/ogg", + "audio/wave", + "audio/wav", + "audio/x-wav", + "audio/x-pn-wav", + "audio/flac", + "audio/x-flac", +] def respond_404(request: SynapseRequest) -> None: + assert request.path is not None respond_with_json( request, 404, - cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + cs_error("Not found '%s'" % (request.path.decode(),), code=Codes.NOT_FOUND), send_cors=True, ) @@ -152,6 +145,14 @@ def add_file_headers( content_type = media_type request.setHeader(b"Content-Type", content_type.encode("UTF-8")) + + # A strict subset of content types is allowed to be inlined so that they may + # be viewed directly in a browser. Other file types are forced to be downloads. + if media_type.lower() in INLINE_CONTENT_TYPES: + disposition = "inline" + else: + disposition = "attachment" + if upload_name: # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. # @@ -173,11 +174,17 @@ def add_file_headers( # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we # may as well just do the filename* version. if _can_encode_filename_as_token(upload_name): - disposition = "inline; filename=%s" % (upload_name,) + disposition = "%s; filename=%s" % ( + disposition, + upload_name, + ) else: - disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + disposition = "%s; filename*=utf-8''%s" % ( + disposition, + _quote(upload_name), + ) - request.setHeader(b"Content-Disposition", disposition.encode("ascii")) + request.setHeader(b"Content-Disposition", disposition.encode("ascii")) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to @@ -325,7 +332,7 @@ class ThumbnailInfo: # Content type of thumbnail, e.g. image/png type: str # The size of the media file, in bytes. - length: Optional[int] = None + length: int @attr.s(slots=True, frozen=True, auto_attribs=True) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index e81c987b10..7fd46901f7 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( from synapse.config.repository import ThumbnailRequirement from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread +from synapse.logging.opentracing import trace from synapse.media._base import ( FileInfo, Responder, @@ -47,6 +48,7 @@ from synapse.media.filepath import MediaFilePaths from synapse.media.media_storage import MediaStorage from synapse.media.storage_provider import StorageProviderWrapper from synapse.media.thumbnailer import Thumbnailer, ThumbnailError +from synapse.media.url_previewer import UrlPreviewer from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import UserID from synapse.util.async_helpers import Linearizer @@ -113,7 +115,7 @@ class MediaRepository: ) storage_providers.append(provider) - self.media_storage = MediaStorage( + self.media_storage: MediaStorage = MediaStorage( self.hs, self.primary_base_path, self.filepaths, storage_providers ) @@ -141,6 +143,13 @@ class MediaRepository: MEDIA_RETENTION_CHECK_PERIOD_MS, ) + if hs.config.media.url_preview_enabled: + self.url_previewer: Optional[UrlPreviewer] = UrlPreviewer( + hs, self, self.media_storage + ) + else: + self.url_previewer = None + def _start_update_recently_accessed(self) -> Deferred: return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed @@ -174,6 +183,7 @@ class MediaRepository: else: self.recently_accessed_locals.add(media_id) + @trace async def create_content( self, media_type: str, @@ -212,7 +222,10 @@ class MediaRepository: user_id=auth_user, ) - await self._generate_thumbnails(None, media_id, media_id, media_type) + try: + await self._generate_thumbnails(None, media_id, media_id, media_type) + except Exception as e: + logger.info("Failed to generate thumbnails: %s", e) return MXCUri(self.server_name, media_id) @@ -611,6 +624,7 @@ class MediaRepository: height=t_height, method=t_method, type=t_type, + length=t_byte_source.tell(), ), ) @@ -681,6 +695,7 @@ class MediaRepository: height=t_height, method=t_method, type=t_type, + length=t_byte_source.tell(), ), ) @@ -710,6 +725,7 @@ class MediaRepository: # Could not generate thumbnail. return None + @trace async def _generate_thumbnails( self, server_name: Optional[str], @@ -825,6 +841,7 @@ class MediaRepository: height=t_height, method=t_method, type=t_type, + length=t_byte_source.tell(), ), ) diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py index a819d95407..a17ccb3d80 100644 --- a/synapse/media/media_storage.py +++ b/synapse/media/media_storage.py @@ -38,6 +38,7 @@ from twisted.protocols.basic import FileSender from synapse.api.errors import NotFoundError from synapse.logging.context import defer_to_thread, make_deferred_yieldable +from synapse.logging.opentracing import start_active_span, trace, trace_with_opname from synapse.util import Clock from synapse.util.file_consumer import BackgroundFileConsumer @@ -76,6 +77,7 @@ class MediaStorage: self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker self.clock = hs.get_clock() + @trace_with_opname("MediaStorage.store_file") async def store_file(self, source: IO, file_info: FileInfo) -> str: """Write `source` to the on disk media store, and also any other configured storage providers @@ -89,16 +91,19 @@ class MediaStorage: """ with self.store_into_file(file_info) as (f, fname, finish_cb): - # Write to the main repository + # Write to the main media repository await self.write_to_file(source, f) + # Write to the other storage providers await finish_cb() return fname + @trace_with_opname("MediaStorage.write_to_file") async def write_to_file(self, source: IO, output: IO) -> None: """Asynchronously write the `source` to `output`.""" await defer_to_thread(self.reactor, _write_file_synchronously, source, output) + @trace_with_opname("MediaStorage.store_into_file") @contextlib.contextmanager def store_into_file( self, file_info: FileInfo @@ -113,9 +118,9 @@ class MediaStorage: fname can be used to read the contents from after upload, e.g. to generate thumbnails. - finish_cb must be called and waited on after the file has been - successfully been written to. Should not be called if there was an - error. + finish_cb must be called and waited on after the file has been successfully been + written to. Should not be called if there was an error. Checks for spam and + stores the file into the configured storage providers. Args: file_info: Info about the file to store @@ -135,35 +140,48 @@ class MediaStorage: finished_called = [False] + main_media_repo_write_trace_scope = start_active_span( + "writing to main media repo" + ) + main_media_repo_write_trace_scope.__enter__() + try: with open(fname, "wb") as f: async def finish() -> None: - # Ensure that all writes have been flushed and close the - # file. - f.flush() - f.close() - - spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam( - ReadableFileWrapper(self.clock, fname), file_info - ) - if spam_check != self._spam_checker_module_callbacks.NOT_SPAM: - logger.info("Blocking media due to spam checker") - # Note that we'll delete the stored media, due to the - # try/except below. The media also won't be stored in - # the DB. - # We currently ignore any additional field returned by - # the spam-check API. - raise SpamMediaException(errcode=spam_check[0]) - - for provider in self.storage_providers: - await provider.store_file(path, file_info) - - finished_called[0] = True + # When someone calls finish, we assume they are done writing to the main media repo + main_media_repo_write_trace_scope.__exit__(None, None, None) + + with start_active_span("writing to other storage providers"): + # Ensure that all writes have been flushed and close the + # file. + f.flush() + f.close() + + spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam( + ReadableFileWrapper(self.clock, fname), file_info + ) + if spam_check != self._spam_checker_module_callbacks.NOT_SPAM: + logger.info("Blocking media due to spam checker") + # Note that we'll delete the stored media, due to the + # try/except below. The media also won't be stored in + # the DB. + # We currently ignore any additional field returned by + # the spam-check API. + raise SpamMediaException(errcode=spam_check[0]) + + for provider in self.storage_providers: + with start_active_span(str(provider)): + await provider.store_file(path, file_info) + + finished_called[0] = True yield f, fname, finish except Exception as e: try: + main_media_repo_write_trace_scope.__exit__( + type(e), None, e.__traceback__ + ) os.remove(fname) except Exception: pass @@ -171,7 +189,11 @@ class MediaStorage: raise e from None if not finished_called: - raise Exception("Finished callback not called") + exc = Exception("Finished callback not called") + main_media_repo_write_trace_scope.__exit__( + type(exc), None, exc.__traceback__ + ) + raise exc async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: """Attempts to fetch media described by file_info from the local cache @@ -214,6 +236,7 @@ class MediaStorage: return None + @trace async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: """Ensures that the given file is in the local cache. Attempts to download it from storage providers if it isn't. @@ -259,6 +282,7 @@ class MediaStorage: raise NotFoundError() + @trace def _file_info_to_path(self, file_info: FileInfo) -> str: """Converts file_info into a relative path. @@ -301,6 +325,7 @@ class MediaStorage: return self.filepaths.local_media_filepath_rel(file_info.file_id) +@trace def _write_file_synchronously(source: IO, dest: IO) -> None: """Write `source` to the file like `dest` synchronously. Should be called from a thread. diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py index c0eaf04be5..2ce842c98d 100644 --- a/synapse/media/oembed.py +++ b/synapse/media/oembed.py @@ -14,7 +14,7 @@ import html import logging import urllib.parse -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, List, Optional, cast import attr @@ -98,7 +98,7 @@ class OEmbedProvider: # No match. return None - def autodiscover_from_html(self, tree: "etree.Element") -> Optional[str]: + def autodiscover_from_html(self, tree: "etree._Element") -> Optional[str]: """ Search an HTML document for oEmbed autodiscovery information. @@ -109,18 +109,22 @@ class OEmbedProvider: The URL to use for oEmbed information, or None if no URL was found. """ # Search for link elements with the proper rel and type attributes. - for tag in tree.xpath( - "//link[@rel='alternate'][@type='application/json+oembed']" + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + for tag in cast( + List["etree._Element"], + tree.xpath("//link[@rel='alternate'][@type='application/json+oembed']"), ): if "href" in tag.attrib: - return tag.attrib["href"] + return cast(str, tag.attrib["href"]) # Some providers (e.g. Flickr) use alternative instead of alternate. - for tag in tree.xpath( - "//link[@rel='alternative'][@type='application/json+oembed']" + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + for tag in cast( + List["etree._Element"], + tree.xpath("//link[@rel='alternative'][@type='application/json+oembed']"), ): if "href" in tag.attrib: - return tag.attrib["href"] + return cast(str, tag.attrib["href"]) return None @@ -200,7 +204,7 @@ class OEmbedProvider: calc_description_and_urls(open_graph_response, oembed["html"]) for size in ("width", "height"): val = oembed.get(size) - if type(val) is int: + if type(val) is int: # noqa: E721 open_graph_response[f"og:video:{size}"] = val elif oembed_type == "link": @@ -212,11 +216,12 @@ class OEmbedProvider: return OEmbedResult(open_graph_response, author_name, cache_age) -def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]: +def _fetch_urls(tree: "etree._Element", tag_name: str) -> List[str]: results = [] - for tag in tree.xpath("//*/" + tag_name): + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + for tag in cast(List["etree._Element"], tree.xpath("//*/" + tag_name)): if "src" in tag.attrib: - results.append(tag.attrib["src"]) + results.append(cast(str, tag.attrib["src"])) return results @@ -244,11 +249,12 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> parser = etree.HTMLParser(recover=True, encoding="utf-8") # Attempt to parse the body. If this fails, log and return no metadata. - tree = etree.fromstring(html_body, parser) + # TODO Develop of lxml-stubs has this correct. + tree = etree.fromstring(html_body, parser) # type: ignore[arg-type] # The data was successfully parsed, but no tree was found. if tree is None: - return + return # type: ignore[unreachable] # Attempt to find interesting URLs (images, videos, embeds). if "og:image" not in open_graph_response: diff --git a/synapse/media/preview_html.py b/synapse/media/preview_html.py index 516d0434f0..1bc7ccb7f3 100644 --- a/synapse/media/preview_html.py +++ b/synapse/media/preview_html.py @@ -24,6 +24,7 @@ from typing import ( Optional, Set, Union, + cast, ) if TYPE_CHECKING: @@ -115,7 +116,7 @@ def _get_html_media_encodings( def decode_body( body: bytes, uri: str, content_type: Optional[str] = None -) -> Optional["etree.Element"]: +) -> Optional["etree._Element"]: """ This uses lxml to parse the HTML document. @@ -152,11 +153,12 @@ def decode_body( # Attempt to parse the body. Returns None if the body was successfully # parsed, but no tree was found. - return etree.fromstring(body, parser) + # TODO Develop of lxml-stubs has this correct. + return etree.fromstring(body, parser) # type: ignore[arg-type] def _get_meta_tags( - tree: "etree.Element", + tree: "etree._Element", property: str, prefix: str, property_mapper: Optional[Callable[[str], Optional[str]]] = None, @@ -175,9 +177,15 @@ def _get_meta_tags( Returns: A map of tag name to value. """ + # This actually returns Dict[str, str], but the caller sets this as a variable + # which is Dict[str, Optional[str]]. results: Dict[str, Optional[str]] = {} - for tag in tree.xpath( - f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]" + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + for tag in cast( + List["etree._Element"], + tree.xpath( + f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]" + ), ): # if we've got more than 50 tags, someone is taking the piss if len(results) >= 50: @@ -187,14 +195,15 @@ def _get_meta_tags( ) return {} - key = tag.attrib[property] + key = cast(str, tag.attrib[property]) if property_mapper: - key = property_mapper(key) + new_key = property_mapper(key) # None is a special value used to ignore a value. - if key is None: + if new_key is None: continue + key = new_key - results[key] = tag.attrib["content"] + results[key] = cast(str, tag.attrib["content"]) return results @@ -219,7 +228,7 @@ def _map_twitter_to_open_graph(key: str) -> Optional[str]: return "og" + key[7:] -def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: +def parse_html_to_open_graph(tree: "etree._Element") -> Dict[str, Optional[str]]: """ Parse the HTML document into an Open Graph response. @@ -276,24 +285,36 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: if "og:title" not in og: # Attempt to find a title from the title tag, or the biggest header on the page. - title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()") + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + title = cast( + List["etree._ElementUnicodeResult"], + tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()"), + ) if title: og["og:title"] = title[0].strip() else: og["og:title"] = None if "og:image" not in og: - meta_image = tree.xpath( - "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]" + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + meta_image = cast( + List["etree._ElementUnicodeResult"], + tree.xpath( + "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]" + ), ) # If a meta image is found, use it. if meta_image: og["og:image"] = meta_image[0] else: # Try to find images which are larger than 10px by 10px. + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. # # TODO: consider inlined CSS styles as well as width & height attribs - images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]") + images = cast( + List["etree._Element"], + tree.xpath("//img[@src][number(@width)>10][number(@height)>10]"), + ) images = sorted( images, key=lambda i: ( @@ -302,20 +323,29 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: ) # If no images were found, try to find *any* images. if not images: - images = tree.xpath("//img[@src][1]") + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + images = cast(List["etree._Element"], tree.xpath("//img[@src][1]")) if images: - og["og:image"] = images[0].attrib["src"] + og["og:image"] = cast(str, images[0].attrib["src"]) # Finally, fallback to the favicon if nothing else. else: - favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]") + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + favicons = cast( + List["etree._ElementUnicodeResult"], + tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]"), + ) if favicons: og["og:image"] = favicons[0] if "og:description" not in og: # Check the first meta description tag for content. - meta_description = tree.xpath( - "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]" + # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. + meta_description = cast( + List["etree._ElementUnicodeResult"], + tree.xpath( + "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]" + ), ) # If a meta description is found with content, use it. if meta_description: @@ -332,7 +362,7 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: return og -def parse_html_description(tree: "etree.Element") -> Optional[str]: +def parse_html_description(tree: "etree._Element") -> Optional[str]: """ Calculate a text description based on an HTML document. @@ -368,6 +398,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: "canvas", "img", "picture", + # etree.Comment is a function which creates an etree._Comment element. + # The "tag" attribute of an etree._Comment instance is confusingly the + # etree.Comment function instead of a string. etree.Comment, } @@ -381,8 +414,8 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: def _iterate_over_text( - tree: Optional["etree.Element"], - tags_to_ignore: Set[Union[str, "etree.Comment"]], + tree: Optional["etree._Element"], + tags_to_ignore: Set[object], stack_limit: int = 1024, ) -> Generator[str, None, None]: """Iterate over the tree returning text nodes in a depth first fashion, @@ -402,7 +435,7 @@ def _iterate_over_text( # This is a stack whose items are elements to iterate over *or* strings # to be returned. - elements: List[Union[str, "etree.Element"]] = [tree] + elements: List[Union[str, "etree._Element"]] = [tree] while elements: el = elements.pop() diff --git a/synapse/media/storage_provider.py b/synapse/media/storage_provider.py index 1c9b71d69c..70a45cfd5b 100644 --- a/synapse/media/storage_provider.py +++ b/synapse/media/storage_provider.py @@ -20,6 +20,7 @@ from typing import TYPE_CHECKING, Callable, Optional from synapse.config._base import Config from synapse.logging.context import defer_to_thread, run_in_background +from synapse.logging.opentracing import start_active_span, trace_with_opname from synapse.util.async_helpers import maybe_awaitable from ._base import FileInfo, Responder @@ -86,6 +87,7 @@ class StorageProviderWrapper(StorageProvider): def __str__(self) -> str: return "StorageProviderWrapper[%s]" % (self.backend,) + @trace_with_opname("StorageProviderWrapper.store_file") async def store_file(self, path: str, file_info: FileInfo) -> None: if not file_info.server_name and not self.store_local: return None @@ -114,6 +116,7 @@ class StorageProviderWrapper(StorageProvider): run_in_background(store) + @trace_with_opname("StorageProviderWrapper.fetch") async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: if file_info.url_cache: # Files in the URL preview cache definitely aren't stored here, @@ -141,6 +144,7 @@ class FileStorageProviderBackend(StorageProvider): def __str__(self) -> str: return "FileStorageProviderBackend[%s]" % (self.base_directory,) + @trace_with_opname("FileStorageProviderBackend.store_file") async def store_file(self, path: str, file_info: FileInfo) -> None: """See StorageProvider.store_file""" @@ -152,13 +156,15 @@ class FileStorageProviderBackend(StorageProvider): # mypy needs help inferring the type of the second parameter, which is generic shutil_copyfile: Callable[[str, str], str] = shutil.copyfile - await defer_to_thread( - self.hs.get_reactor(), - shutil_copyfile, - primary_fname, - backup_fname, - ) - + with start_active_span("shutil_copyfile"): + await defer_to_thread( + self.hs.get_reactor(), + shutil_copyfile, + primary_fname, + backup_fname, + ) + + @trace_with_opname("FileStorageProviderBackend.fetch") async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: """See StorageProvider.fetch""" diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py index f909a4fb9a..d8979813b3 100644 --- a/synapse/media/thumbnailer.py +++ b/synapse/media/thumbnailer.py @@ -19,6 +19,8 @@ from typing import Optional, Tuple, Type from PIL import Image +from synapse.logging.opentracing import trace + logger = logging.getLogger(__name__) EXIF_ORIENTATION_TAG = 0x0112 @@ -76,12 +78,13 @@ class Thumbnailer: image_exif = self.image._getexif() # type: ignore if image_exif is not None: image_orientation = image_exif.get(EXIF_ORIENTATION_TAG) - assert type(image_orientation) is int + assert type(image_orientation) is int # noqa: E721 self.transpose_method = EXIF_TRANSPOSE_MAPPINGS.get(image_orientation) except Exception as e: # A lot of parsing errors can happen when parsing EXIF logger.info("Error parsing image EXIF information: %s", e) + @trace def transpose(self) -> Tuple[int, int]: """Transpose the image using its EXIF Orientation tag @@ -131,8 +134,9 @@ class Thumbnailer: else: with self.image: self.image = self.image.convert("RGB") - return self.image.resize((width, height), Image.ANTIALIAS) + return self.image.resize((width, height), Image.LANCZOS) + @trace def scale(self, width: int, height: int, output_type: str) -> BytesIO: """Rescales the image to the given dimensions. @@ -142,6 +146,7 @@ class Thumbnailer: with self._resize(width, height) as scaled: return self._encode_image(scaled, output_type) + @trace def crop(self, width: int, height: int, output_type: str) -> BytesIO: """Rescales and crops the image to the given dimensions preserving aspect:: diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index 70b32cee17..9b5a3dd5f4 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -846,9 +846,7 @@ def _is_media(content_type: str) -> bool: def _is_html(content_type: str) -> bool: content_type = content_type.lower() - return content_type.startswith("text/html") or content_type.startswith( - "application/xhtml" - ) + return content_type.startswith(("text/html", "application/xhtml")) def _is_json(content_type: str) -> bool: |