summary refs log tree commit diff
path: root/synapse/media/storage_provider.py
blob: a2d50adf6586e6e2c3c945044da36a106baa13c9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#

import abc
import logging
import os
import shutil
from typing import TYPE_CHECKING, Callable, Optional
from uuid import uuid4

from synapse.config._base import Config
from synapse.logging.context import defer_to_thread, run_in_background
from synapse.logging.opentracing import start_active_span, trace_with_opname
from synapse.util.async_helpers import maybe_awaitable

from ..storage.databases.main.media_repository import LocalMedia
from ._base import FileInfo, Responder
from .media_storage import FileResponder, MultipartResponder

logger = logging.getLogger(__name__)

if TYPE_CHECKING:
    from synapse.server import HomeServer


class StorageProvider(metaclass=abc.ABCMeta):
    """A storage provider is a service that can store uploaded media and
    retrieve them.
    """

    @abc.abstractmethod
    async def store_file(self, path: str, file_info: FileInfo) -> None:
        """Store the file described by file_info. The actual contents can be
        retrieved by reading the file in file_info.upload_path.

        Args:
            path: Relative path of file in local cache
            file_info: The metadata of the file.
        """

    @abc.abstractmethod
    async def fetch(
        self,
        path: str,
        file_info: FileInfo,
        media_info: Optional[LocalMedia] = None,
        federation: bool = False,
    ) -> Optional[Responder]:
        """Attempt to fetch the file described by file_info and stream it
        into writer.

        Args:
            path: Relative path of file in local cache
            file_info: The metadata of the file.
            media_info: metadata of the media item
            federation: Whether the requested media is for a federation request

        Returns:
            Returns a Responder if the provider has the file, otherwise returns None.
        """


class StorageProviderWrapper(StorageProvider):
    """Wraps a storage provider and provides various config options

    Args:
        backend: The storage provider to wrap.
        store_local: Whether to store new local files or not.
        store_synchronous: Whether to wait for file to be successfully
            uploaded, or todo the upload in the background.
        store_remote: Whether remote media should be uploaded
    """

    def __init__(
        self,
        backend: StorageProvider,
        store_local: bool,
        store_synchronous: bool,
        store_remote: bool,
    ):
        self.backend = backend
        self.store_local = store_local
        self.store_synchronous = store_synchronous
        self.store_remote = store_remote

    def __str__(self) -> str:
        return "StorageProviderWrapper[%s]" % (self.backend,)

    @trace_with_opname("StorageProviderWrapper.store_file")
    async def store_file(self, path: str, file_info: FileInfo) -> None:
        if not file_info.server_name and not self.store_local:
            return None

        if file_info.server_name and not self.store_remote:
            return None

        if file_info.url_cache:
            # The URL preview cache is short lived and not worth offloading or
            # backing up.
            return None

        if self.store_synchronous:
            # store_file is supposed to return an Awaitable, but guard
            # against improper implementations.
            await maybe_awaitable(self.backend.store_file(path, file_info))  # type: ignore
        else:
            # TODO: Handle errors.
            async def store() -> None:
                try:
                    return await maybe_awaitable(
                        self.backend.store_file(path, file_info)
                    )
                except Exception:
                    logger.exception("Error storing file")

            run_in_background(store)

    @trace_with_opname("StorageProviderWrapper.fetch")
    async def fetch(
        self,
        path: str,
        file_info: FileInfo,
        media_info: Optional[LocalMedia] = None,
        federation: bool = False,
    ) -> Optional[Responder]:
        if file_info.url_cache:
            # Files in the URL preview cache definitely aren't stored here,
            # so avoid any potentially slow I/O or network access.
            return None

        # store_file is supposed to return an Awaitable, but guard
        # against improper implementations.
        return await maybe_awaitable(
            self.backend.fetch(path, file_info, media_info, federation)
        )


class FileStorageProviderBackend(StorageProvider):
    """A storage provider that stores files in a directory on a filesystem.

    Args:
        hs
        config: The config returned by `parse_config`.
    """

    def __init__(self, hs: "HomeServer", config: str):
        self.hs = hs
        self.cache_directory = hs.config.media.media_store_path
        self.base_directory = config

    def __str__(self) -> str:
        return "FileStorageProviderBackend[%s]" % (self.base_directory,)

    @trace_with_opname("FileStorageProviderBackend.store_file")
    async def store_file(self, path: str, file_info: FileInfo) -> None:
        """See StorageProvider.store_file"""

        primary_fname = os.path.join(self.cache_directory, path)
        backup_fname = os.path.join(self.base_directory, path)

        dirname = os.path.dirname(backup_fname)
        os.makedirs(dirname, exist_ok=True)

        # mypy needs help inferring the type of the second parameter, which is generic
        shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
        with start_active_span("shutil_copyfile"):
            await defer_to_thread(
                self.hs.get_reactor(),
                shutil_copyfile,
                primary_fname,
                backup_fname,
            )

    @trace_with_opname("FileStorageProviderBackend.fetch")
    async def fetch(
        self,
        path: str,
        file_info: FileInfo,
        media_info: Optional[LocalMedia] = None,
        federation: bool = False,
    ) -> Optional[Responder]:
        """See StorageProvider.fetch"""

        backup_fname = os.path.join(self.base_directory, path)
        if os.path.isfile(backup_fname):
            if federation:
                assert media_info is not None
                boundary = uuid4().hex.encode("ascii")
                return MultipartResponder(
                    open(backup_fname, "rb"), media_info, boundary
                )
            return FileResponder(open(backup_fname, "rb"))

        return None

    @staticmethod
    def parse_config(config: dict) -> str:
        """Called on startup to parse config supplied. This should parse
        the config and raise if there is a problem.

        The returned value is passed into the constructor.

        In this case we only care about a single param, the directory, so let's
        just pull that out.
        """
        return Config.ensure_directory(config["directory"])