From 836d5c44b6457a93c0d163e721ded153382d2a79 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 22 May 2017 21:14:20 +0100 Subject: actually trim oversize og:description meta --- synapse/rest/media/v1/preview_url_resource.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 99760d622f..c680fddab5 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -434,6 +434,8 @@ def _calc_og(tree, media_uri): for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE) ) og['og:description'] = summarize_paragraphs(text_nodes) + else: + og['og:description'] = summarize_paragraphs([og['og:description']]) # TODO: delete the url downloads to stop diskfilling, # as we only ever cared about its OG -- cgit 1.4.1 From 48d29494165f2b12e2d2b6206766828e2a542ef4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 10:23:14 +0100 Subject: Throw exception when not retrying when downloading media --- synapse/rest/media/v1/media_repository.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index caca96c222..bae2b4c757 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -184,6 +184,7 @@ class MediaRepository(object): raise except NotRetryingDestination: logger.warn("Not retrying destination %r", server_name) + raise SynapseError(502, "Failed to fetch remote media") except Exception: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) -- cgit 1.4.1 From b8b936a6eab46cec2460fb723124bb3a750d3c83 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 17:39:21 +0100 Subject: Add API to quarantine media --- synapse/rest/client/v1/admin.py | 25 ++++++++ synapse/rest/media/v1/download_resource.py | 2 +- synapse/rest/media/v1/media_repository.py | 2 + synapse/rest/media/v1/thumbnail_resource.py | 4 +- synapse/storage/media_repository.py | 4 +- synapse/storage/room.py | 70 ++++++++++++++++++++++ .../storage/schema/delta/43/quarantine_media.sql | 17 ++++++ 7 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/schema/delta/43/quarantine_media.sql (limited to 'synapse/rest/media') diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index aaa3dffb1b..7d786e8de3 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -270,6 +270,30 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): })) +class QuarantineMediaInRoom(ClientV1RestServlet): + """Quarantines all media in a room so that no one can download it via + this server. + """ + PATTERNS = client_path_patterns("/admin/quarantine_media/(?P[^/]+)") + + def __init__(self, hs): + super(QuarantineMediaInRoom, self).__init__(hs) + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_POST(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + num_quarantined = yield self.store.quarantine_media_ids_in_room( + room_id, requester.user.to_string(), + ) + + defer.returnValue((200, {"num_quarantined": num_quarantined})) + + class ResetPasswordRestServlet(ClientV1RestServlet): """Post request to allow an administrator reset password for a user. This need a user have a administrator access in Synapse. @@ -467,3 +491,4 @@ def register_servlets(hs, http_server): GetUsersPaginatedRestServlet(hs).register(http_server) SearchUsersRestServlet(hs).register(http_server) ShutdownRoomRestServlet(hs).register(http_server) + QuarantineMediaInRoom(hs).register(http_server) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 6788375e85..39a286b83c 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -66,7 +66,7 @@ class DownloadResource(Resource): @defer.inlineCallbacks def _respond_local_file(self, request, media_id, name): media_info = yield self.store.get_local_media(media_id) - if not media_info: + if not media_info or media_info["quarantined_by"]: respond_404(request) return diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index bae2b4c757..0718f75241 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -135,6 +135,8 @@ class MediaRepository(object): media_info = yield self._download_remote_file( server_name, media_id ) + elif media_info["quarantined_by"]: + raise NotFoundError() else: self.recently_accessed_remotes.add((server_name, media_id)) yield self.store.update_cached_last_access_time( diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index d8f54adc99..59b2c39b2f 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -81,7 +81,7 @@ class ThumbnailResource(Resource): method, m_type): media_info = yield self.store.get_local_media(media_id) - if not media_info: + if not media_info or media_info["quarantined_by"]: respond_404(request) return @@ -117,7 +117,7 @@ class ThumbnailResource(Resource): desired_type): media_info = yield self.store.get_local_media(media_id) - if not media_info: + if not media_info or media_info["quarantined_by"]: respond_404(request) return diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 4c0f82353d..5f0f18ee66 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -30,7 +30,7 @@ class MediaRepositoryStore(SQLBaseStore): return self._simple_select_one( "local_media_repository", {"media_id": media_id}, - ("media_type", "media_length", "upload_name", "created_ts"), + ("media_type", "media_length", "upload_name", "created_ts", "quarantined_by"), allow_none=True, desc="get_local_media", ) @@ -138,7 +138,7 @@ class MediaRepositoryStore(SQLBaseStore): {"media_origin": origin, "media_id": media_id}, ( "media_type", "media_length", "upload_name", "created_ts", - "filesystem_id", + "filesystem_id", "quarantined_by", ), allow_none=True, desc="get_cached_remote_media", diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 07366f66b6..e9c1549c00 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -24,6 +24,7 @@ from .engines import PostgresEngine, Sqlite3Engine import collections import logging import ujson as json +import re logger = logging.getLogger(__name__) @@ -531,3 +532,72 @@ class RoomStore(SQLBaseStore): desc="block_room", ) self.is_room_blocked.invalidate((room_id,)) + + def quarantine_media_ids_in_room(self, room_id, quarantined_by): + """For a room loops through all events with media and quarantines + the associated media + """ + def _get_media_ids_in_room(txn): + mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") + + next_token = self.get_current_events_token() + 1 + + total_media_quarantined = 0 + + while next_token: + sql = """ + SELECT stream_ordering, content FROM events + WHERE room_id = ? + AND stream_ordering < ? + AND contains_url = ? AND outlier = ? + ORDER BY stream_ordering DESC + LIMIT ? + """ + txn.execute(sql, (room_id, next_token, True, False, 100)) + + next_token = None + local_media_mxcs = [] + remote_media_mxcs = [] + for stream_ordering, content_json in txn: + next_token = stream_ordering + content = json.loads(content_json) + + url = content.get("url") + if not url: + continue + + matches = mxc_re.match(url) + if matches: + hostname = matches.group(1) + media_id = matches.group(2) + if hostname == self.hostname: + local_media_mxcs.append(media_id) + else: + remote_media_mxcs.append((hostname, media_id)) + + # Now update all the tables to set the quarantined_by flag + + txn.executemany(""" + UPDATE local_media_repository + SET quarantined_by = ? + WHERE media_id = ? + """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + + txn.executemany( + """ + UPDATE remote_media_cache + SET quarantined_by = ? + WHERE media_origin AND media_id = ? + """, + ( + (quarantined_by, origin, media_id) + for origin, media_id in remote_media_mxcs + ) + ) + + total_media_quarantined += len(local_media_mxcs) + total_media_quarantined += len(remote_media_mxcs) + + return total_media_quarantined + + return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) diff --git a/synapse/storage/schema/delta/43/quarantine_media.sql b/synapse/storage/schema/delta/43/quarantine_media.sql new file mode 100644 index 0000000000..630907ec4f --- /dev/null +++ b/synapse/storage/schema/delta/43/quarantine_media.sql @@ -0,0 +1,17 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE local_media_repository ADD COLUMN quarantined_by TEXT; +ALTER TABLE remote_media_cache ADD COLUMN quarantined_by TEXT; -- cgit 1.4.1 From 7fe8ed1787ca0c1f7a4c56aa1c85e486b76f4550 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Jun 2017 11:14:11 +0100 Subject: Store URL cache preview downloads seperately This makes it easier to clear old media out at a later date --- synapse/rest/media/v1/download_resource.py | 7 ++++++- synapse/rest/media/v1/filepath.py | 18 ++++++++++++++++ synapse/rest/media/v1/media_repository.py | 30 ++++++++++++++++++++------- synapse/rest/media/v1/preview_url_resource.py | 7 ++++--- synapse/rest/media/v1/thumbnail_resource.py | 28 +++++++++++++++++++------ synapse/storage/media_repository.py | 8 +++++-- synapse/storage/schema/delta/43/url_cache.sql | 16 ++++++++++++++ 7 files changed, 94 insertions(+), 20 deletions(-) create mode 100644 synapse/storage/schema/delta/43/url_cache.sql (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 39a286b83c..6879249c8a 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -73,7 +73,12 @@ class DownloadResource(Resource): media_type = media_info["media_type"] media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] - file_path = self.filepaths.local_media_filepath(media_id) + if media_info["url_cache"]: + # TODO: Check the file still exists, if it doesn't we can redownload + # it from the url `media_info["url_cache"]` + file_path = self.filepaths.url_cache_filepath(media_id) + else: + file_path = self.filepaths.local_media_filepath(media_id) yield respond_with_file( request, media_type, file_path, media_length, diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 0137458f71..d92b7ff337 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -71,3 +71,21 @@ class MediaFilePaths(object): self.base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) + + def url_cache_filepath(self, media_id): + return os.path.join( + self.base_path, "url_cache", + media_id[0:2], media_id[2:4], media_id[4:] + ) + + def url_cache_thumbnail(self, media_id, width, height, content_type, + method): + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s-%s" % ( + width, height, top_level_type, sub_type, method + ) + return os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[0:2], media_id[2:4], media_id[4:], + file_name + ) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0718f75241..0ea1248ce6 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -326,13 +326,17 @@ class MediaRepository(object): defer.returnValue(t_path) @defer.inlineCallbacks - def _generate_local_thumbnails(self, media_id, media_info): + def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): media_type = media_info["media_type"] requirements = self._get_thumbnail_requirements(media_type) if not requirements: return - input_path = self.filepaths.local_media_filepath(media_id) + if url_cache: + input_path = self.filepaths.url_cache_filepath(media_id) + else: + input_path = self.filepaths.local_media_filepath(media_id) + thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -360,9 +364,14 @@ class MediaRepository(object): for t_width, t_height, t_type in scales: t_method = "scale" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + if url_cache: + t_path = self.filepaths.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) self._makedirs(t_path) t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) @@ -377,9 +386,14 @@ class MediaRepository(object): # thumbnail. continue t_method = "crop" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + if url_cache: + t_path = self.filepaths.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) self._makedirs(t_path) t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) local_thumbnails.append(( diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c680fddab5..b81a336c5d 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -164,7 +164,7 @@ class PreviewUrlResource(Resource): if _is_media(media_info['media_type']): dims = yield self.media_repo._generate_local_thumbnails( - media_info['filesystem_id'], media_info + media_info['filesystem_id'], media_info, url_cache=True, ) og = { @@ -210,7 +210,7 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images dims = yield self.media_repo._generate_local_thumbnails( - image_info['filesystem_id'], image_info + image_info['filesystem_id'], image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] @@ -256,7 +256,7 @@ class PreviewUrlResource(Resource): # XXX: horrible duplication with base_resource's _download_remote_file() file_id = random_string(24) - fname = self.filepaths.local_media_filepath(file_id) + fname = self.filepaths.url_cache_filepath(file_id) self.media_repo._makedirs(fname) try: @@ -303,6 +303,7 @@ class PreviewUrlResource(Resource): upload_name=download_name, media_length=length, user_id=user, + url_cache=url, ) except Exception as e: diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 59b2c39b2f..68d56b2b10 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -101,9 +101,16 @@ class ThumbnailResource(Resource): t_type = thumbnail_info["thumbnail_type"] t_method = thumbnail_info["thumbnail_method"] - file_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method, - ) + if media_info["url_cache"]: + # TODO: Check the file still exists, if it doesn't we can redownload + # it from the url `media_info["url_cache"]` + file_path = self.filepaths.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method, + ) + else: + file_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method, + ) yield respond_with_file(request, t_type, file_path) else: @@ -134,9 +141,18 @@ class ThumbnailResource(Resource): t_type = info["thumbnail_type"] == desired_type if t_w and t_h and t_method and t_type: - file_path = self.filepaths.local_media_thumbnail( - media_id, desired_width, desired_height, desired_type, desired_method, - ) + if media_info["url_cache"]: + # TODO: Check the file still exists, if it doesn't we can redownload + # it from the url `media_info["url_cache"]` + file_path = self.filepaths.url_cache_thumbnail( + media_id, desired_width, desired_height, desired_type, + desired_method, + ) + else: + file_path = self.filepaths.local_media_thumbnail( + media_id, desired_width, desired_height, desired_type, + desired_method, + ) yield respond_with_file(request, desired_type, file_path) return diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 5f0f18ee66..82bb61b811 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -30,13 +30,16 @@ class MediaRepositoryStore(SQLBaseStore): return self._simple_select_one( "local_media_repository", {"media_id": media_id}, - ("media_type", "media_length", "upload_name", "created_ts", "quarantined_by"), + ( + "media_type", "media_length", "upload_name", "created_ts", + "quarantined_by", "url_cache", + ), allow_none=True, desc="get_local_media", ) def store_local_media(self, media_id, media_type, time_now_ms, upload_name, - media_length, user_id): + media_length, user_id, url_cache=None): return self._simple_insert( "local_media_repository", { @@ -46,6 +49,7 @@ class MediaRepositoryStore(SQLBaseStore): "upload_name": upload_name, "media_length": media_length, "user_id": user_id.to_string(), + "url_cache": url_cache, }, desc="store_local_media", ) diff --git a/synapse/storage/schema/delta/43/url_cache.sql b/synapse/storage/schema/delta/43/url_cache.sql new file mode 100644 index 0000000000..45ebe020da --- /dev/null +++ b/synapse/storage/schema/delta/43/url_cache.sql @@ -0,0 +1,16 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE local_media_repository ADD COLUMN url_cache TEXT; -- cgit 1.4.1