From a9f90fa73af8bc324aa4fbe2ca920fe5f47c4fee Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2020 11:56:58 +0100 Subject: Type hints for RegistrationStore (#8615) --- synapse/storage/databases/main/__init__.py | 1 - synapse/storage/databases/main/registration.py | 156 +++++++++++++------------ 2 files changed, 83 insertions(+), 74 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 9b16f45f3e..43660ec4fb 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -146,7 +146,6 @@ class DataStore( db_conn, "e2e_cross_signing_keys", "stream_id" ) - self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id") diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 4c843b7679..b0329e17ec 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -16,29 +16,33 @@ # limitations under the License. import logging import re -from typing import Any, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple from synapse.api.constants import UserTypes from synapse.api.errors import Codes, StoreError, SynapseError, ThreepidValidationError from synapse.metrics.background_process_metrics import wrap_as_background_process -from synapse.storage._base import SQLBaseStore from synapse.storage.database import DatabasePool -from synapse.storage.types import Cursor +from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore +from synapse.storage.databases.main.stats import StatsStore +from synapse.storage.types import Connection, Cursor +from synapse.storage.util.id_generators import IdGenerator from synapse.storage.util.sequence import build_sequence_generator from synapse.types import UserID from synapse.util.caches.descriptors import cached +if TYPE_CHECKING: + from synapse.server import HomeServer + THIRTY_MINUTES_IN_MS = 30 * 60 * 1000 logger = logging.getLogger(__name__) -class RegistrationWorkerStore(SQLBaseStore): - def __init__(self, database: DatabasePool, db_conn, hs): +class RegistrationWorkerStore(CacheInvalidationWorkerStore): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.config = hs.config - self.clock = hs.get_clock() # Note: we don't check this sequence for consistency as we'd have to # call `find_max_generated_user_id_localpart` each time, which is @@ -55,7 +59,7 @@ class RegistrationWorkerStore(SQLBaseStore): # Create a background job for culling expired 3PID validity tokens if hs.config.run_background_tasks: - self.clock.looping_call( + self._clock.looping_call( self.cull_expired_threepid_validation_tokens, THIRTY_MINUTES_IN_MS ) @@ -92,7 +96,7 @@ class RegistrationWorkerStore(SQLBaseStore): if not info: return False - now = self.clock.time_msec() + now = self._clock.time_msec() trial_duration_ms = self.config.mau_trial_days * 24 * 60 * 60 * 1000 is_trial = (now - info["creation_ts"] * 1000) < trial_duration_ms return is_trial @@ -257,7 +261,7 @@ class RegistrationWorkerStore(SQLBaseStore): return await self.db_pool.runInteraction( "get_users_expiring_soon", select_users_txn, - self.clock.time_msec(), + self._clock.time_msec(), self.config.account_validity.renew_at, ) @@ -328,13 +332,17 @@ class RegistrationWorkerStore(SQLBaseStore): await self.db_pool.runInteraction("set_server_admin", set_server_admin_txn) def _query_for_auth(self, txn, token): - sql = ( - "SELECT users.name, users.is_guest, users.shadow_banned, access_tokens.id as token_id," - " access_tokens.device_id, access_tokens.valid_until_ms" - " FROM users" - " INNER JOIN access_tokens on users.name = access_tokens.user_id" - " WHERE token = ?" - ) + sql = """ + SELECT users.name, + users.is_guest, + users.shadow_banned, + access_tokens.id as token_id, + access_tokens.device_id, + access_tokens.valid_until_ms + FROM users + INNER JOIN access_tokens on users.name = access_tokens.user_id + WHERE token = ? + """ txn.execute(sql, (token,)) rows = self.db_pool.cursor_to_dict(txn) @@ -803,7 +811,7 @@ class RegistrationWorkerStore(SQLBaseStore): await self.db_pool.runInteraction( "cull_expired_threepid_validation_tokens", cull_expired_threepid_validation_tokens_txn, - self.clock.time_msec(), + self._clock.time_msec(), ) @wrap_as_background_process("account_validity_set_expiration_dates") @@ -890,10 +898,10 @@ class RegistrationWorkerStore(SQLBaseStore): class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): super().__init__(database, db_conn, hs) - self.clock = hs.get_clock() + self._clock = hs.get_clock() self.config = hs.config self.db_pool.updates.register_background_index_update( @@ -1016,13 +1024,56 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): return 1 + async def set_user_deactivated_status( + self, user_id: str, deactivated: bool + ) -> None: + """Set the `deactivated` property for the provided user to the provided value. + + Args: + user_id: The ID of the user to set the status for. + deactivated: The value to set for `deactivated`. + """ + + await self.db_pool.runInteraction( + "set_user_deactivated_status", + self.set_user_deactivated_status_txn, + user_id, + deactivated, + ) + + def set_user_deactivated_status_txn(self, txn, user_id: str, deactivated: bool): + self.db_pool.simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"deactivated": 1 if deactivated else 0}, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_deactivated_status, (user_id,) + ) + txn.call_after(self.is_guest.invalidate, (user_id,)) + + @cached() + async def is_guest(self, user_id: str) -> bool: + res = await self.db_pool.simple_select_one_onecol( + table="users", + keyvalues={"name": user_id}, + retcol="is_guest", + allow_none=True, + desc="is_guest", + ) + + return res if res else False + -class RegistrationStore(RegistrationBackgroundUpdateStore): - def __init__(self, database: DatabasePool, db_conn, hs): +class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): super().__init__(database, db_conn, hs) self._ignore_unknown_session_error = hs.config.request_token_inhibit_3pid_errors + self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") + async def add_access_token_to_user( self, user_id: str, @@ -1138,19 +1189,19 @@ class RegistrationStore(RegistrationBackgroundUpdateStore): def _register_user( self, txn, - user_id, - password_hash, - was_guest, - make_guest, - appservice_id, - create_profile_with_displayname, - admin, - user_type, - shadow_banned, + user_id: str, + password_hash: Optional[str], + was_guest: bool, + make_guest: bool, + appservice_id: Optional[str], + create_profile_with_displayname: Optional[str], + admin: bool, + user_type: Optional[str], + shadow_banned: bool, ): user_id_obj = UserID.from_string(user_id) - now = int(self.clock.time()) + now = int(self._clock.time()) try: if was_guest: @@ -1374,18 +1425,6 @@ class RegistrationStore(RegistrationBackgroundUpdateStore): await self.db_pool.runInteraction("delete_access_token", f) - @cached() - async def is_guest(self, user_id: str) -> bool: - res = await self.db_pool.simple_select_one_onecol( - table="users", - keyvalues={"name": user_id}, - retcol="is_guest", - allow_none=True, - desc="is_guest", - ) - - return res if res else False - async def add_user_pending_deactivation(self, user_id: str) -> None: """ Adds a user to the table of users who need to be parted from all the rooms they're @@ -1479,7 +1518,7 @@ class RegistrationStore(RegistrationBackgroundUpdateStore): txn, table="threepid_validation_session", keyvalues={"session_id": session_id}, - updatevalues={"validated_at": self.clock.time_msec()}, + updatevalues={"validated_at": self._clock.time_msec()}, ) return next_link @@ -1547,35 +1586,6 @@ class RegistrationStore(RegistrationBackgroundUpdateStore): start_or_continue_validation_session_txn, ) - async def set_user_deactivated_status( - self, user_id: str, deactivated: bool - ) -> None: - """Set the `deactivated` property for the provided user to the provided value. - - Args: - user_id: The ID of the user to set the status for. - deactivated: The value to set for `deactivated`. - """ - - await self.db_pool.runInteraction( - "set_user_deactivated_status", - self.set_user_deactivated_status_txn, - user_id, - deactivated, - ) - - def set_user_deactivated_status_txn(self, txn, user_id, deactivated): - self.db_pool.simple_update_one_txn( - txn=txn, - table="users", - keyvalues={"name": user_id}, - updatevalues={"deactivated": 1 if deactivated else 0}, - ) - self._invalidate_cache_and_stream( - txn, self.get_user_deactivated_status, (user_id,) - ) - txn.call_after(self.is_guest.invalidate, (user_id,)) - def find_max_generated_user_id_localpart(cur: Cursor) -> int: """ -- cgit 1.5.1 From e8dbbcb64c2725933a5bcefeeeaff9efbd0c2261 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Mon, 26 Oct 2020 14:51:33 +0000 Subject: Fix get|set_type_stream_id_for_appservice store functions (#8648) --- changelog.d/8648.bugfix | 1 + synapse/handlers/appservice.py | 12 +++--- synapse/storage/databases/main/appservice.py | 29 ++++++++++---- tests/storage/test_appservice.py | 56 ++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 13 deletions(-) create mode 100644 changelog.d/8648.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/8648.bugfix b/changelog.d/8648.bugfix new file mode 100644 index 0000000000..aa71ad0ff2 --- /dev/null +++ b/changelog.d/8648.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.22.0rc1 which would cause ephemeral events to not be sent to appservices. \ No newline at end of file diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 7826387e53..03e9ec4d4e 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -236,16 +236,16 @@ class ApplicationServicesHandler: events = await self._handle_receipts(service) if events: self.scheduler.submit_ephemeral_events_for_as(service, events) - await self.store.set_type_stream_id_for_appservice( - service, "read_receipt", new_token - ) + await self.store.set_type_stream_id_for_appservice( + service, "read_receipt", new_token + ) elif stream_key == "presence_key": events = await self._handle_presence(service, users) if events: self.scheduler.submit_ephemeral_events_for_as(service, events) - await self.store.set_type_stream_id_for_appservice( - service, "presence", new_token - ) + await self.store.set_type_stream_id_for_appservice( + service, "presence", new_token + ) async def _handle_typing(self, service: ApplicationService, new_token: int): typing_source = self.event_sources.sources["typing"] diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 43bf0f649a..637a938bac 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -369,17 +369,25 @@ class ApplicationServiceTransactionWorkerStore( async def get_type_stream_id_for_appservice( self, service: ApplicationService, type: str ) -> int: + if type not in ("read_receipt", "presence"): + raise ValueError( + "Expected type to be a valid application stream id type, got %s" + % (type,) + ) + def get_type_stream_id_for_appservice_txn(txn): stream_id_type = "%s_stream_id" % type txn.execute( - "SELECT ? FROM application_services_state WHERE as_id=?", - (stream_id_type, service.id,), + # We do NOT want to escape `stream_id_type`. + "SELECT %s FROM application_services_state WHERE as_id=?" + % stream_id_type, + (service.id,), ) - last_txn_id = txn.fetchone() - if last_txn_id is None or last_txn_id[0] is None: # no row exists + last_stream_id = txn.fetchone() + if last_stream_id is None or last_stream_id[0] is None: # no row exists return 0 else: - return int(last_txn_id[0]) + return int(last_stream_id[0]) return await self.db_pool.runInteraction( "get_type_stream_id_for_appservice", get_type_stream_id_for_appservice_txn @@ -388,11 +396,18 @@ class ApplicationServiceTransactionWorkerStore( async def set_type_stream_id_for_appservice( self, service: ApplicationService, type: str, pos: int ) -> None: + if type not in ("read_receipt", "presence"): + raise ValueError( + "Expected type to be a valid application stream id type, got %s" + % (type,) + ) + def set_type_stream_id_for_appservice_txn(txn): stream_id_type = "%s_stream_id" % type txn.execute( - "UPDATE ? SET device_list_stream_id = ? WHERE as_id=?", - (stream_id_type, pos, service.id), + "UPDATE application_services_state SET %s = ? WHERE as_id=?" + % stream_id_type, + (pos, service.id), ) await self.db_pool.runInteraction( diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index c5c7987349..1ce29af5fd 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -410,6 +410,62 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): ) +class ApplicationServiceStoreTypeStreamIds(unittest.HomeserverTestCase): + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver() + return hs + + def prepare(self, hs, reactor, clock): + self.service = Mock(id="foo") + self.store = self.hs.get_datastore() + self.get_success(self.store.set_appservice_state(self.service, "up")) + + def test_get_type_stream_id_for_appservice_no_value(self): + value = self.get_success( + self.store.get_type_stream_id_for_appservice(self.service, "read_receipt") + ) + self.assertEquals(value, 0) + + value = self.get_success( + self.store.get_type_stream_id_for_appservice(self.service, "presence") + ) + self.assertEquals(value, 0) + + def test_get_type_stream_id_for_appservice_invalid_type(self): + self.get_failure( + self.store.get_type_stream_id_for_appservice(self.service, "foobar"), + ValueError, + ) + + def test_set_type_stream_id_for_appservice(self): + read_receipt_value = 1024 + self.get_success( + self.store.set_type_stream_id_for_appservice( + self.service, "read_receipt", read_receipt_value + ) + ) + result = self.get_success( + self.store.get_type_stream_id_for_appservice(self.service, "read_receipt") + ) + self.assertEqual(result, read_receipt_value) + + self.get_success( + self.store.set_type_stream_id_for_appservice( + self.service, "presence", read_receipt_value + ) + ) + result = self.get_success( + self.store.get_type_stream_id_for_appservice(self.service, "presence") + ) + self.assertEqual(result, read_receipt_value) + + def test_set_type_stream_id_for_appservice_invalid_type(self): + self.get_failure( + self.store.set_type_stream_id_for_appservice(self.service, "foobar", 1024), + ValueError, + ) + + # required for ApplicationServiceTransactionStoreTestCase tests class TestTransactionStore(ApplicationServiceTransactionStore, ApplicationServiceStore): def __init__(self, database: DatabasePool, db_conn, hs): -- cgit 1.5.1 From 49d72dea2a6804e4795fb9e1cbc1f1bb1354f08f Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 26 Oct 2020 18:02:28 +0100 Subject: Add an admin api to delete local media. (#8519) Related to: #6459, #3479 Add `DELETE /_synapse/admin/v1/media//` to delete a single file from server. --- changelog.d/8519.feature | 1 + docs/admin_api/media_admin_api.md | 79 +++ synapse/rest/admin/media.py | 81 ++- synapse/rest/media/v1/filepath.py | 17 + synapse/rest/media/v1/media_repository.py | 72 ++- synapse/storage/databases/main/media_repository.py | 53 ++ tests/rest/admin/test_media.py | 568 +++++++++++++++++++++ 7 files changed, 868 insertions(+), 3 deletions(-) create mode 100644 changelog.d/8519.feature create mode 100644 tests/rest/admin/test_media.py (limited to 'synapse/storage') diff --git a/changelog.d/8519.feature b/changelog.d/8519.feature new file mode 100644 index 0000000000..e2ab548681 --- /dev/null +++ b/changelog.d/8519.feature @@ -0,0 +1 @@ +Add an admin api to delete a single file or files were not used for a defined time from server. Contributed by @dklimpel. \ No newline at end of file diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 26948770d8..3994e1f1a9 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -100,3 +100,82 @@ Response: "num_quarantined": 10 # The number of media items successfully quarantined } ``` + +# Delete local media +This API deletes the *local* media from the disk of your own server. +This includes any local thumbnails and copies of media downloaded from +remote homeservers. +This API will not affect media that has been uploaded to external +media repositories (e.g https://github.com/turt2live/matrix-media-repo/). +See also [purge_remote_media.rst](purge_remote_media.rst). + +## Delete a specific local media +Delete a specific `media_id`. + +Request: + +``` +DELETE /_synapse/admin/v1/media// + +{} +``` + +URL Parameters + +* `server_name`: string - The name of your local server (e.g `matrix.org`) +* `media_id`: string - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`) + +Response: + +```json + { + "deleted_media": [ + "abcdefghijklmnopqrstuvwx" + ], + "total": 1 + } +``` + +The following fields are returned in the JSON response body: + +* `deleted_media`: an array of strings - List of deleted `media_id` +* `total`: integer - Total number of deleted `media_id` + +## Delete local media by date or size + +Request: + +``` +POST /_synapse/admin/v1/media//delete?before_ts= + +{} +``` + +URL Parameters + +* `server_name`: string - The name of your local server (e.g `matrix.org`). +* `before_ts`: string representing a positive integer - Unix timestamp in ms. +Files that were last used before this timestamp will be deleted. It is the timestamp of +last access and not the timestamp creation. +* `size_gt`: Optional - string representing a positive integer - Size of the media in bytes. +Files that are larger will be deleted. Defaults to `0`. +* `keep_profiles`: Optional - string representing a boolean - Switch to also delete files +that are still used in image data (e.g user profile, room avatar). +If `false` these files will be deleted. Defaults to `true`. + +Response: + +```json + { + "deleted_media": [ + "abcdefghijklmnopqrstuvwx", + "abcdefghijklmnopqrstuvwz" + ], + "total": 2 + } +``` + +The following fields are returned in the JSON response body: + +* `deleted_media`: an array of strings - List of deleted `media_id` +* `total`: integer - Total number of deleted `media_id` diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index ee75095c0e..ba50cb876d 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -16,9 +16,10 @@ import logging -from synapse.api.errors import AuthError -from synapse.http.servlet import RestServlet, parse_integer +from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError +from synapse.http.servlet import RestServlet, parse_boolean, parse_integer from synapse.rest.admin._base import ( + admin_patterns, assert_requester_is_admin, assert_user_is_admin, historical_admin_path_patterns, @@ -150,6 +151,80 @@ class PurgeMediaCacheRestServlet(RestServlet): return 200, ret +class DeleteMediaByID(RestServlet): + """Delete local media by a given ID. Removes it from this server. + """ + + PATTERNS = admin_patterns("/media/(?P[^/]+)/(?P[^/]+)") + + def __init__(self, hs): + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.server_name = hs.hostname + self.media_repository = hs.get_media_repository() + + async def on_DELETE(self, request, server_name: str, media_id: str): + await assert_requester_is_admin(self.auth, request) + + if self.server_name != server_name: + raise SynapseError(400, "Can only delete local media") + + if await self.store.get_local_media(media_id) is None: + raise NotFoundError("Unknown media") + + logging.info("Deleting local media by ID: %s", media_id) + + deleted_media, total = await self.media_repository.delete_local_media(media_id) + return 200, {"deleted_media": deleted_media, "total": total} + + +class DeleteMediaByDateSize(RestServlet): + """Delete local media and local copies of remote media by + timestamp and size. + """ + + PATTERNS = admin_patterns("/media/(?P[^/]+)/delete") + + def __init__(self, hs): + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.server_name = hs.hostname + self.media_repository = hs.get_media_repository() + + async def on_POST(self, request, server_name: str): + await assert_requester_is_admin(self.auth, request) + + before_ts = parse_integer(request, "before_ts", required=True) + size_gt = parse_integer(request, "size_gt", default=0) + keep_profiles = parse_boolean(request, "keep_profiles", default=True) + + if before_ts < 0: + raise SynapseError( + 400, + "Query parameter before_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + if size_gt < 0: + raise SynapseError( + 400, + "Query parameter size_gt must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if self.server_name != server_name: + raise SynapseError(400, "Can only delete local media") + + logging.info( + "Deleting local media by timestamp: %s, size larger than: %s, keep profile media: %s" + % (before_ts, size_gt, keep_profiles) + ) + + deleted_media, total = await self.media_repository.delete_old_local_media( + before_ts, size_gt, keep_profiles + ) + return 200, {"deleted_media": deleted_media, "total": total} + + def register_servlets_for_media_repo(hs, http_server): """ Media repo specific APIs. @@ -159,3 +234,5 @@ def register_servlets_for_media_repo(hs, http_server): QuarantineMediaByID(hs).register(http_server) QuarantineMediaByUser(hs).register(http_server) ListMediaInRoom(hs).register(http_server) + DeleteMediaByID(hs).register(http_server) + DeleteMediaByDateSize(hs).register(http_server) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 7447eeaebe..9e079f672f 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -69,6 +69,23 @@ class MediaFilePaths: local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + def local_media_thumbnail_dir(self, media_id: str) -> str: + """ + Retrieve the local store path of thumbnails of a given media_id + + Args: + media_id: The media ID to query. + Returns: + Path of local_thumbnails from media_id + """ + return os.path.join( + self.base_path, + "local_thumbnails", + media_id[0:2], + media_id[2:4], + media_id[4:], + ) + def remote_media_filepath_rel(self, server_name, file_id): return os.path.join( "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e1192b47cd..5cce7237a0 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -18,7 +18,7 @@ import errno import logging import os import shutil -from typing import IO, Dict, Optional, Tuple +from typing import IO, Dict, List, Optional, Tuple import twisted.internet.error import twisted.web.http @@ -767,6 +767,76 @@ class MediaRepository: return {"deleted": deleted} + async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]: + """ + Delete the given local or remote media ID from this server + + Args: + media_id: The media ID to delete. + Returns: + A tuple of (list of deleted media IDs, total deleted media IDs). + """ + return await self._remove_local_media_from_disk([media_id]) + + async def delete_old_local_media( + self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True, + ) -> Tuple[List[str], int]: + """ + Delete local or remote media from this server by size and timestamp. Removes + media files, any thumbnails and cached URLs. + + Args: + before_ts: Unix timestamp in ms. + Files that were last used before this timestamp will be deleted + size_gt: Size of the media in bytes. Files that are larger will be deleted + keep_profiles: Switch to delete also files that are still used in image data + (e.g user profile, room avatar) + If false these files will be deleted + Returns: + A tuple of (list of deleted media IDs, total deleted media IDs). + """ + old_media = await self.store.get_local_media_before( + before_ts, size_gt, keep_profiles, + ) + return await self._remove_local_media_from_disk(old_media) + + async def _remove_local_media_from_disk( + self, media_ids: List[str] + ) -> Tuple[List[str], int]: + """ + Delete local or remote media from this server. Removes media files, + any thumbnails and cached URLs. + + Args: + media_ids: List of media_id to delete + Returns: + A tuple of (list of deleted media IDs, total deleted media IDs). + """ + removed_media = [] + for media_id in media_ids: + logger.info("Deleting media with ID '%s'", media_id) + full_path = self.filepaths.local_media_filepath(media_id) + try: + os.remove(full_path) + except OSError as e: + logger.warning("Failed to remove file: %r: %s", full_path, e) + if e.errno == errno.ENOENT: + pass + else: + continue + + thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id) + shutil.rmtree(thumbnail_dir, ignore_errors=True) + + await self.store.delete_remote_media(self.server_name, media_id) + + await self.store.delete_url_cache((media_id,)) + await self.store.delete_url_cache_media((media_id,)) + + removed_media.append(media_id) + + return removed_media, len(removed_media) + class MediaRepositoryResource(Resource): """File uploading and downloading. diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index cc538c5c10..7ef5f1bf2b 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -93,6 +93,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) + self.server_name = hs.hostname async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]: """Get the metadata for a local piece of media @@ -115,6 +116,58 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): desc="get_local_media", ) + async def get_local_media_before( + self, before_ts: int, size_gt: int, keep_profiles: bool, + ) -> Optional[List[str]]: + + # to find files that have never been accessed (last_access_ts IS NULL) + # compare with `created_ts` + sql = """ + SELECT media_id + FROM local_media_repository AS lmr + WHERE + ( last_access_ts < ? + OR ( created_ts < ? AND last_access_ts IS NULL ) ) + AND media_length > ? + """ + + if keep_profiles: + sql_keep = """ + AND ( + NOT EXISTS + (SELECT 1 + FROM profiles + WHERE profiles.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM groups + WHERE groups.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM room_memberships + WHERE room_memberships.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM user_directory + WHERE user_directory.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM room_stats_state + WHERE room_stats_state.avatar = '{media_prefix}' || lmr.media_id) + ) + """.format( + media_prefix="mxc://%s/" % (self.server_name,), + ) + sql += sql_keep + + def _get_local_media_before_txn(txn): + txn.execute(sql, (before_ts, before_ts, size_gt)) + return [row[0] for row in txn] + + return await self.db_pool.runInteraction( + "get_local_media_before", _get_local_media_before_txn + ) + async def store_local_media( self, media_id, diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py new file mode 100644 index 0000000000..721fa1ed51 --- /dev/null +++ b/tests/rest/admin/test_media.py @@ -0,0 +1,568 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Dirk Klimpel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +from binascii import unhexlify + +import synapse.rest.admin +from synapse.api.errors import Codes +from synapse.rest.client.v1 import login, profile, room +from synapse.rest.media.v1.filepath import MediaFilePaths + +from tests import unittest + + +class DeleteMediaByIDTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets, + synapse.rest.admin.register_servlets_for_media_repo, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.handler = hs.get_device_handler() + self.media_repo = hs.get_media_repository_resource() + self.server_name = hs.hostname + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.filepaths = MediaFilePaths(hs.config.media_store_path) + + def test_no_auth(self): + """ + Try to delete media without authentication. + """ + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345") + + request, channel = self.make_request("DELETE", url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error is returned. + """ + self.other_user = self.register_user("user", "pass") + self.other_user_token = self.login("user", "pass") + + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345") + + request, channel = self.make_request( + "DELETE", url, access_token=self.other_user_token, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_media_does_not_exist(self): + """ + Tests that a lookup for a media that does not exist returns a 404 + """ + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345") + + request, channel = self.make_request( + "DELETE", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(404, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_media_is_not_local(self): + """ + Tests that a lookup for a media that is not a local returns a 400 + """ + url = "/_synapse/admin/v1/media/%s/%s" % ("unknown_domain", "12345") + + request, channel = self.make_request( + "DELETE", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual("Can only delete local media", channel.json_body["error"]) + + def test_delete_media(self): + """ + Tests that delete a media is successfully + """ + + download_resource = self.media_repo.children[b"download"] + upload_resource = self.media_repo.children[b"upload"] + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + response = self.helper.upload_media( + upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + ) + # Extract media ID from the response + server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' + server_name, media_id = server_and_media_id.split("/") + + self.assertEqual(server_name, self.server_name) + + # Attempt to access media + request, channel = self.make_request( + "GET", + server_and_media_id, + shorthand=False, + access_token=self.admin_user_tok, + ) + request.render(download_resource) + self.pump(1.0) + + # Should be successful + self.assertEqual( + 200, + channel.code, + msg=( + "Expected to receive a 200 on accessing media: %s" % server_and_media_id + ), + ) + + # Test if the file exists + local_path = self.filepaths.local_media_filepath(media_id) + self.assertTrue(os.path.exists(local_path)) + + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, media_id) + + # Delete media + request, channel = self.make_request( + "DELETE", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + media_id, channel.json_body["deleted_media"][0], + ) + + # Attempt to access media + request, channel = self.make_request( + "GET", + server_and_media_id, + shorthand=False, + access_token=self.admin_user_tok, + ) + request.render(download_resource) + self.pump(1.0) + self.assertEqual( + 404, + channel.code, + msg=( + "Expected to receive a 404 on accessing deleted media: %s" + % server_and_media_id + ), + ) + + # Test if the file is deleted + self.assertFalse(os.path.exists(local_path)) + + +class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets, + synapse.rest.admin.register_servlets_for_media_repo, + login.register_servlets, + profile.register_servlets, + room.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.handler = hs.get_device_handler() + self.media_repo = hs.get_media_repository_resource() + self.server_name = hs.hostname + self.clock = hs.clock + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.url = "/_synapse/admin/v1/media/%s/delete" % self.server_name + + def test_no_auth(self): + """ + Try to delete media without authentication. + """ + + request, channel = self.make_request("POST", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error is returned. + """ + self.other_user = self.register_user("user", "pass") + self.other_user_token = self.login("user", "pass") + + request, channel = self.make_request( + "POST", self.url, access_token=self.other_user_token, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_media_is_not_local(self): + """ + Tests that a lookup for media that is not local returns a 400 + """ + url = "/_synapse/admin/v1/media/%s/delete" % "unknown_domain" + + request, channel = self.make_request( + "POST", url + "?before_ts=1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual("Can only delete local media", channel.json_body["error"]) + + def test_missing_parameter(self): + """ + If the parameter `before_ts` is missing, an error is returned. + """ + request, channel = self.make_request( + "POST", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Missing integer query parameter b'before_ts'", channel.json_body["error"] + ) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + request, channel = self.make_request( + "POST", self.url + "?before_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter before_ts must be a string representing a positive integer.", + channel.json_body["error"], + ) + + request, channel = self.make_request( + "POST", + self.url + "?before_ts=1234&size_gt=-1234", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter size_gt must be a string representing a positive integer.", + channel.json_body["error"], + ) + + request, channel = self.make_request( + "POST", + self.url + "?before_ts=1234&keep_profiles=not_bool", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + self.assertEqual( + "Boolean query parameter b'keep_profiles' must be one of ['true', 'false']", + channel.json_body["error"], + ) + + def test_delete_media_never_accessed(self): + """ + Tests that media deleted if it is older than `before_ts` and never accessed + `last_access_ts` is `NULL` and `created_ts` < `before_ts` + """ + + # upload and do not access + server_and_media_id = self._create_media() + self.pump(1.0) + + # test that the file exists + media_id = server_and_media_id.split("/")[1] + local_path = self.filepaths.local_media_filepath(media_id) + self.assertTrue(os.path.exists(local_path)) + + # timestamp after upload/create + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + media_id, channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def test_keep_media_by_date(self): + """ + Tests that media is not deleted if it is newer than `before_ts` + """ + + # timestamp before upload + now_ms = self.clock.time_msec() + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + # timestamp after upload + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def test_keep_media_by_size(self): + """ + Tests that media is not deleted if its size is smaller than or equal + to `size_gt` + """ + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&size_gt=67", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&size_gt=66", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def test_keep_media_by_user_avatar(self): + """ + Tests that we do not delete media if is used as a user avatar + Tests parameter `keep_profiles` + """ + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + # set media as avatar + request, channel = self.make_request( + "PUT", + "/profile/%s/avatar_url" % (self.admin_user,), + content=json.dumps({"avatar_url": "mxc://%s" % (server_and_media_id,)}), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=true", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=false", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def test_keep_media_by_room_avatar(self): + """ + Tests that we do not delete media if it is used as a room avatar + Tests parameter `keep_profiles` + """ + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + # set media as room avatar + room_id = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) + request, channel = self.make_request( + "PUT", + "/rooms/%s/state/m.room.avatar" % (room_id,), + content=json.dumps({"url": "mxc://%s" % (server_and_media_id,)}), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=true", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=false", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def _create_media(self): + """ + Create a media and return media_id and server_and_media_id + """ + upload_resource = self.media_repo.children[b"upload"] + # file size is 67 Byte + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + response = self.helper.upload_media( + upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + ) + # Extract media ID from the response + server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' + server_name = server_and_media_id.split("/")[0] + + # Check that new media is a local and not remote + self.assertEqual(server_name, self.server_name) + + return server_and_media_id + + def _access_media(self, server_and_media_id, expect_success=True): + """ + Try to access a media and check the result + """ + download_resource = self.media_repo.children[b"download"] + + media_id = server_and_media_id.split("/")[1] + local_path = self.filepaths.local_media_filepath(media_id) + + request, channel = self.make_request( + "GET", + server_and_media_id, + shorthand=False, + access_token=self.admin_user_tok, + ) + request.render(download_resource) + self.pump(1.0) + + if expect_success: + self.assertEqual( + 200, + channel.code, + msg=( + "Expected to receive a 200 on accessing media: %s" + % server_and_media_id + ), + ) + # Test that the file exists + self.assertTrue(os.path.exists(local_path)) + else: + self.assertEqual( + 404, + channel.code, + msg=( + "Expected to receive a 404 on accessing deleted media: %s" + % (server_and_media_id) + ), + ) + # Test that the file is deleted + self.assertFalse(os.path.exists(local_path)) -- cgit 1.5.1 From 66e6801c3e6e8a0e8c6370079e72a4230e08b3eb Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 26 Oct 2020 19:16:37 +0100 Subject: Split admin API for reported events into a detail and a list view (#8539) Split admin API for reported events in detail und list view. API was introduced with #8217 in synapse v.1.21.0. It makes the list (`GET /_synapse/admin/v1/event_reports`) less complex and provides a better overview. The details can be queried with: `GET /_synapse/admin/v1/event_reports/`. It is similar to room and users API. It is a kind of regression in `GET /_synapse/admin/v1/event_reports`. `event_json` was removed. But the api was introduced one version before and it is an admin API (not under spec). Signed-off-by: Dirk Klimpel dirk@klimpel.org --- changelog.d/8539.feature | 1 + docs/admin_api/event_reports.rst | 144 +++++++++++++++--------- synapse/rest/admin/__init__.py | 6 +- synapse/rest/admin/event_reports.py | 46 +++++++- synapse/storage/databases/main/room.py | 104 ++++++++++++++--- tests/rest/admin/test_event_reports.py | 196 ++++++++++++++++++++++++++++++--- 6 files changed, 411 insertions(+), 86 deletions(-) create mode 100644 changelog.d/8539.feature (limited to 'synapse/storage') diff --git a/changelog.d/8539.feature b/changelog.d/8539.feature new file mode 100644 index 0000000000..15ce02fb86 --- /dev/null +++ b/changelog.d/8539.feature @@ -0,0 +1 @@ +Split admin API for reported events (`GET /_synapse/admin/v1/event_reports`) into detail and list endpoints. This is a breaking change to #8217 which was introduced in Synapse v1.21.0. Those who already use this API should check their scripts. Contributed by @dklimpel. \ No newline at end of file diff --git a/docs/admin_api/event_reports.rst b/docs/admin_api/event_reports.rst index 461be01230..5f7b0fa6bb 100644 --- a/docs/admin_api/event_reports.rst +++ b/docs/admin_api/event_reports.rst @@ -17,67 +17,26 @@ It returns a JSON body like the following: { "event_reports": [ { - "content": { - "reason": "foo", - "score": -100 - }, "event_id": "$bNUFCwGzWca1meCGkjp-zwslF-GfVcXukvRLI1_FaVY", - "event_json": { - "auth_events": [ - "$YK4arsKKcc0LRoe700pS8DSjOvUT4NDv0HfInlMFw2M", - "$oggsNXxzPFRE3y53SUNd7nsj69-QzKv03a1RucHu-ws" - ], - "content": { - "body": "matrix.org: This Week in Matrix", - "format": "org.matrix.custom.html", - "formatted_body": "matrix.org:
This Week in Matrix", - "msgtype": "m.notice" - }, - "depth": 546, - "hashes": { - "sha256": "xK1//xnmvHJIOvbgXlkI8eEqdvoMmihVDJ9J4SNlsAw" - }, - "origin": "matrix.org", - "origin_server_ts": 1592291711430, - "prev_events": [ - "$YK4arsKKcc0LRoe700pS8DSjOvUT4NDv0HfInlMFw2M" - ], - "prev_state": [], - "room_id": "!ERAgBpSOcCCuTJqQPk:matrix.org", - "sender": "@foobar:matrix.org", - "signatures": { - "matrix.org": { - "ed25519:a_JaEG": "cs+OUKW/iHx5pEidbWxh0UiNNHwe46Ai9LwNz+Ah16aWDNszVIe2gaAcVZfvNsBhakQTew51tlKmL2kspXk/Dg" - } - }, - "type": "m.room.message", - "unsigned": { - "age_ts": 1592291711430, - } - }, "id": 2, "reason": "foo", + "score": -100, "received_ts": 1570897107409, - "room_alias": "#alias1:matrix.org", + "canonical_alias": "#alias1:matrix.org", "room_id": "!ERAgBpSOcCCuTJqQPk:matrix.org", + "name": "Matrix HQ", "sender": "@foobar:matrix.org", "user_id": "@foo:matrix.org" }, { - "content": { - "reason": "bar", - "score": -100 - }, "event_id": "$3IcdZsDaN_En-S1DF4EMCy3v4gNRKeOJs8W5qTOKj4I", - "event_json": { - // hidden items - // see above - }, "id": 3, "reason": "bar", + "score": -100, "received_ts": 1598889612059, - "room_alias": "#alias2:matrix.org", + "canonical_alias": "#alias2:matrix.org", "room_id": "!eGvUQuTCkHGVwNMOjv:matrix.org", + "name": "Your room name here", "sender": "@foobar:matrix.org", "user_id": "@bar:matrix.org" } @@ -113,17 +72,94 @@ The following fields are returned in the JSON response body: - ``id``: integer - ID of event report. - ``received_ts``: integer - The timestamp (in milliseconds since the unix epoch) when this report was sent. - ``room_id``: string - The ID of the room in which the event being reported is located. +- ``name``: string - The name of the room. - ``event_id``: string - The ID of the reported event. - ``user_id``: string - This is the user who reported the event and wrote the reason. - ``reason``: string - Comment made by the ``user_id`` in this report. May be blank. -- ``content``: object - Content of reported event. - - - ``reason``: string - Comment made by the ``user_id`` in this report. May be blank. - - ``score``: integer - Content is reported based upon a negative score, where -100 is "most offensive" and 0 is "inoffensive". - +- ``score``: integer - Content is reported based upon a negative score, where -100 is "most offensive" and 0 is "inoffensive". - ``sender``: string - This is the ID of the user who sent the original message/event that was reported. -- ``room_alias``: string - The alias of the room. ``null`` if the room does not have a canonical alias set. -- ``event_json``: object - Details of the original event that was reported. +- ``canonical_alias``: string - The canonical alias of the room. ``null`` if the room does not have a canonical alias set. - ``next_token``: integer - Indication for pagination. See above. - ``total``: integer - Total number of event reports related to the query (``user_id`` and ``room_id``). +Show details of a specific event report +======================================= + +This API returns information about a specific event report. + +The api is:: + + GET /_synapse/admin/v1/event_reports/ + +To use it, you will need to authenticate by providing an ``access_token`` for a +server admin: see `README.rst `_. + +It returns a JSON body like the following: + +.. code:: jsonc + + { + "event_id": "$bNUFCwGzWca1meCGkjp-zwslF-GfVcXukvRLI1_FaVY", + "event_json": { + "auth_events": [ + "$YK4arsKKcc0LRoe700pS8DSjOvUT4NDv0HfInlMFw2M", + "$oggsNXxzPFRE3y53SUNd7nsj69-QzKv03a1RucHu-ws" + ], + "content": { + "body": "matrix.org: This Week in Matrix", + "format": "org.matrix.custom.html", + "formatted_body": "matrix.org:
This Week in Matrix", + "msgtype": "m.notice" + }, + "depth": 546, + "hashes": { + "sha256": "xK1//xnmvHJIOvbgXlkI8eEqdvoMmihVDJ9J4SNlsAw" + }, + "origin": "matrix.org", + "origin_server_ts": 1592291711430, + "prev_events": [ + "$YK4arsKKcc0LRoe700pS8DSjOvUT4NDv0HfInlMFw2M" + ], + "prev_state": [], + "room_id": "!ERAgBpSOcCCuTJqQPk:matrix.org", + "sender": "@foobar:matrix.org", + "signatures": { + "matrix.org": { + "ed25519:a_JaEG": "cs+OUKW/iHx5pEidbWxh0UiNNHwe46Ai9LwNz+Ah16aWDNszVIe2gaAcVZfvNsBhakQTew51tlKmL2kspXk/Dg" + } + }, + "type": "m.room.message", + "unsigned": { + "age_ts": 1592291711430, + } + }, + "id": , + "reason": "foo", + "score": -100, + "received_ts": 1570897107409, + "canonical_alias": "#alias1:matrix.org", + "room_id": "!ERAgBpSOcCCuTJqQPk:matrix.org", + "name": "Matrix HQ", + "sender": "@foobar:matrix.org", + "user_id": "@foo:matrix.org" + } + +**URL parameters:** + +- ``report_id``: string - The ID of the event report. + +**Response** + +The following fields are returned in the JSON response body: + +- ``id``: integer - ID of event report. +- ``received_ts``: integer - The timestamp (in milliseconds since the unix epoch) when this report was sent. +- ``room_id``: string - The ID of the room in which the event being reported is located. +- ``name``: string - The name of the room. +- ``event_id``: string - The ID of the reported event. +- ``user_id``: string - This is the user who reported the event and wrote the reason. +- ``reason``: string - Comment made by the ``user_id`` in this report. May be blank. +- ``score``: integer - Content is reported based upon a negative score, where -100 is "most offensive" and 0 is "inoffensive". +- ``sender``: string - This is the ID of the user who sent the original message/event that was reported. +- ``canonical_alias``: string - The canonical alias of the room. ``null`` if the room does not have a canonical alias set. +- ``event_json``: object - Details of the original event that was reported. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 789431ef25..df14bdf26e 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -31,7 +31,10 @@ from synapse.rest.admin.devices import ( DeviceRestServlet, DevicesRestServlet, ) -from synapse.rest.admin.event_reports import EventReportsRestServlet +from synapse.rest.admin.event_reports import ( + EventReportDetailRestServlet, + EventReportsRestServlet, +) from synapse.rest.admin.groups import DeleteGroupAdminRestServlet from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet @@ -222,6 +225,7 @@ def register_servlets(hs, http_server): DevicesRestServlet(hs).register(http_server) DeleteDevicesRestServlet(hs).register(http_server) EventReportsRestServlet(hs).register(http_server) + EventReportDetailRestServlet(hs).register(http_server) def register_servlets_for_client_rest_resource(hs, http_server): diff --git a/synapse/rest/admin/event_reports.py b/synapse/rest/admin/event_reports.py index 5b8d0594cd..fd482f0e32 100644 --- a/synapse/rest/admin/event_reports.py +++ b/synapse/rest/admin/event_reports.py @@ -15,7 +15,7 @@ import logging -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin @@ -86,3 +86,47 @@ class EventReportsRestServlet(RestServlet): ret["next_token"] = start + len(event_reports) return 200, ret + + +class EventReportDetailRestServlet(RestServlet): + """ + Get a specific reported event that is known to the homeserver. Results are returned + in a dictionary containing report information. + The requester must have administrator access in Synapse. + + GET /_synapse/admin/v1/event_reports/ + returns: + 200 OK with details report if success otherwise an error. + + Args: + The parameter `report_id` is the ID of the event report in the database. + Returns: + JSON blob of information about the event report + """ + + PATTERNS = admin_patterns("/event_reports/(?P[^/]*)$") + + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + async def on_GET(self, request, report_id): + await assert_requester_is_admin(self.auth, request) + + message = ( + "The report_id parameter must be a string representing a positive integer." + ) + try: + report_id = int(report_id) + except ValueError: + raise SynapseError(400, message, errcode=Codes.INVALID_PARAM) + + if report_id < 0: + raise SynapseError(400, message, errcode=Codes.INVALID_PARAM) + + ret = await self.store.get_event_report(report_id) + if not ret: + raise NotFoundError("Event report not found") + + return 200, ret diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index e83d961c20..dc0c4b5499 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1411,6 +1411,65 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): desc="add_event_report", ) + async def get_event_report(self, report_id: int) -> Optional[Dict[str, Any]]: + """Retrieve an event report + + Args: + report_id: ID of reported event in database + Returns: + event_report: json list of information from event report + """ + + def _get_event_report_txn(txn, report_id): + + sql = """ + SELECT + er.id, + er.received_ts, + er.room_id, + er.event_id, + er.user_id, + er.content, + events.sender, + room_stats_state.canonical_alias, + room_stats_state.name, + event_json.json AS event_json + FROM event_reports AS er + LEFT JOIN events + ON events.event_id = er.event_id + JOIN event_json + ON event_json.event_id = er.event_id + JOIN room_stats_state + ON room_stats_state.room_id = er.room_id + WHERE er.id = ? + """ + + txn.execute(sql, [report_id]) + row = txn.fetchone() + + if not row: + return None + + event_report = { + "id": row[0], + "received_ts": row[1], + "room_id": row[2], + "event_id": row[3], + "user_id": row[4], + "score": db_to_json(row[5]).get("score"), + "reason": db_to_json(row[5]).get("reason"), + "sender": row[6], + "canonical_alias": row[7], + "name": row[8], + "event_json": db_to_json(row[9]), + } + + return event_report + + return await self.db_pool.runInteraction( + "get_event_report", _get_event_report_txn, report_id + ) + async def get_event_reports_paginate( self, start: int, @@ -1468,18 +1527,15 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): er.room_id, er.event_id, er.user_id, - er.reason, er.content, events.sender, - room_aliases.room_alias, - event_json.json AS event_json + room_stats_state.canonical_alias, + room_stats_state.name FROM event_reports AS er - LEFT JOIN room_aliases - ON room_aliases.room_id = er.room_id - JOIN events + LEFT JOIN events ON events.event_id = er.event_id - JOIN event_json - ON event_json.event_id = er.event_id + JOIN room_stats_state + ON room_stats_state.room_id = er.room_id {where_clause} ORDER BY er.received_ts {order} LIMIT ? @@ -1490,15 +1546,29 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): args += [limit, start] txn.execute(sql, args) - event_reports = self.db_pool.cursor_to_dict(txn) - - if count > 0: - for row in event_reports: - try: - row["content"] = db_to_json(row["content"]) - row["event_json"] = db_to_json(row["event_json"]) - except Exception: - continue + + event_reports = [] + for row in txn: + try: + s = db_to_json(row[5]).get("score") + r = db_to_json(row[5]).get("reason") + except Exception: + logger.error("Unable to parse json from event_reports: %s", row[0]) + continue + event_reports.append( + { + "id": row[0], + "received_ts": row[1], + "room_id": row[2], + "event_id": row[3], + "user_id": row[4], + "score": s, + "reason": r, + "sender": row[6], + "canonical_alias": row[7], + "name": row[8], + } + ) return event_reports, count diff --git a/tests/rest/admin/test_event_reports.py b/tests/rest/admin/test_event_reports.py index bf79086f78..303622217f 100644 --- a/tests/rest/admin/test_event_reports.py +++ b/tests/rest/admin/test_event_reports.py @@ -70,6 +70,16 @@ class EventReportsTestCase(unittest.HomeserverTestCase): self.url = "/_synapse/admin/v1/event_reports" + def test_no_auth(self): + """ + Try to get an event report without authentication. + """ + request, channel = self.make_request("GET", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + def test_requester_is_no_admin(self): """ If the user is not a server admin, an error 403 is returned. @@ -266,7 +276,7 @@ class EventReportsTestCase(unittest.HomeserverTestCase): def test_limit_is_negative(self): """ - Testing that a negative list parameter returns a 400 + Testing that a negative limit parameter returns a 400 """ request, channel = self.make_request( @@ -360,7 +370,7 @@ class EventReportsTestCase(unittest.HomeserverTestCase): self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) def _check_fields(self, content): - """Checks that all attributes are present in a event report + """Checks that all attributes are present in an event report """ for c in content: self.assertIn("id", c) @@ -368,15 +378,175 @@ class EventReportsTestCase(unittest.HomeserverTestCase): self.assertIn("room_id", c) self.assertIn("event_id", c) self.assertIn("user_id", c) - self.assertIn("reason", c) - self.assertIn("content", c) self.assertIn("sender", c) - self.assertIn("room_alias", c) - self.assertIn("event_json", c) - self.assertIn("score", c["content"]) - self.assertIn("reason", c["content"]) - self.assertIn("auth_events", c["event_json"]) - self.assertIn("type", c["event_json"]) - self.assertIn("room_id", c["event_json"]) - self.assertIn("sender", c["event_json"]) - self.assertIn("content", c["event_json"]) + self.assertIn("canonical_alias", c) + self.assertIn("name", c) + self.assertIn("score", c) + self.assertIn("reason", c) + + +class EventReportDetailTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + room.register_servlets, + report_event.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + self.room_id1 = self.helper.create_room_as( + self.other_user, tok=self.other_user_tok, is_public=True + ) + self.helper.join(self.room_id1, user=self.admin_user, tok=self.admin_user_tok) + + self._create_event_and_report( + room_id=self.room_id1, user_tok=self.other_user_tok, + ) + + # first created event report gets `id`=2 + self.url = "/_synapse/admin/v1/event_reports/2" + + def test_no_auth(self): + """ + Try to get event report without authentication. + """ + request, channel = self.make_request("GET", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error 403 is returned. + """ + + request, channel = self.make_request( + "GET", self.url, access_token=self.other_user_tok, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_default_success(self): + """ + Testing get a reported event + """ + + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self._check_fields(channel.json_body) + + def test_invalid_report_id(self): + """ + Testing that an invalid `report_id` returns a 400. + """ + + # `report_id` is negative + request, channel = self.make_request( + "GET", + "/_synapse/admin/v1/event_reports/-123", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "The report_id parameter must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # `report_id` is a non-numerical string + request, channel = self.make_request( + "GET", + "/_synapse/admin/v1/event_reports/abcdef", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "The report_id parameter must be a string representing a positive integer.", + channel.json_body["error"], + ) + + # `report_id` is undefined + request, channel = self.make_request( + "GET", + "/_synapse/admin/v1/event_reports/", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "The report_id parameter must be a string representing a positive integer.", + channel.json_body["error"], + ) + + def test_report_id_not_found(self): + """ + Testing that a not existing `report_id` returns a 404. + """ + + request, channel = self.make_request( + "GET", + "/_synapse/admin/v1/event_reports/123", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(404, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + self.assertEqual("Event report not found", channel.json_body["error"]) + + def _create_event_and_report(self, room_id, user_tok): + """Create and report events + """ + resp = self.helper.send(room_id, tok=user_tok) + event_id = resp["event_id"] + + request, channel = self.make_request( + "POST", + "rooms/%s/report/%s" % (room_id, event_id), + json.dumps({"score": -100, "reason": "this makes me sad"}), + access_token=user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + + def _check_fields(self, content): + """Checks that all attributes are present in a event report + """ + self.assertIn("id", content) + self.assertIn("received_ts", content) + self.assertIn("room_id", content) + self.assertIn("event_id", content) + self.assertIn("user_id", content) + self.assertIn("sender", content) + self.assertIn("canonical_alias", content) + self.assertIn("name", content) + self.assertIn("event_json", content) + self.assertIn("score", content) + self.assertIn("reason", content) + self.assertIn("auth_events", content["event_json"]) + self.assertIn("type", content["event_json"]) + self.assertIn("room_id", content["event_json"]) + self.assertIn("sender", content["event_json"]) + self.assertIn("content", content["event_json"]) -- cgit 1.5.1 From 10f45d85bb355c66110b9221b0aa09010d2d03ad Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 14:17:31 -0400 Subject: Add type hints for account validity handler (#8620) This also fixes a bug by fixing handling of an account which doesn't expire. --- changelog.d/8620.bugfix | 1 + mypy.ini | 1 + synapse/handlers/account_validity.py | 29 ++++++++++++++++++++------ synapse/handlers/profile.py | 4 ++-- synapse/storage/databases/main/profile.py | 4 ++-- synapse/storage/databases/main/registration.py | 4 ++-- 6 files changed, 31 insertions(+), 12 deletions(-) create mode 100644 changelog.d/8620.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/8620.bugfix b/changelog.d/8620.bugfix new file mode 100644 index 0000000000..c1078a3fb5 --- /dev/null +++ b/changelog.d/8620.bugfix @@ -0,0 +1 @@ +Fix a bug where the account validity endpoint would silently fail if the user ID did not have an expiration time. It now returns a 400 error. diff --git a/mypy.ini b/mypy.ini index 59d9074c3b..1fbd8decf8 100644 --- a/mypy.ini +++ b/mypy.ini @@ -17,6 +17,7 @@ files = synapse/federation, synapse/handlers/_base.py, synapse/handlers/account_data.py, + synapse/handlers/account_validity.py, synapse/handlers/appservice.py, synapse/handlers/auth.py, synapse/handlers/cas_handler.py, diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index fd4f762f33..664d09da1c 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -18,19 +18,22 @@ import email.utils import logging from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText -from typing import List +from typing import TYPE_CHECKING, List -from synapse.api.errors import StoreError +from synapse.api.errors import StoreError, SynapseError from synapse.logging.context import make_deferred_yieldable from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.types import UserID from synapse.util import stringutils +if TYPE_CHECKING: + from synapse.app.homeserver import HomeServer + logger = logging.getLogger(__name__) class AccountValidityHandler: - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.hs = hs self.config = hs.config self.store = self.hs.get_datastore() @@ -67,7 +70,7 @@ class AccountValidityHandler: self.clock.looping_call(self._send_renewal_emails, 30 * 60 * 1000) @wrap_as_background_process("send_renewals") - async def _send_renewal_emails(self): + async def _send_renewal_emails(self) -> None: """Gets the list of users whose account is expiring in the amount of time configured in the ``renew_at`` parameter from the ``account_validity`` configuration, and sends renewal emails to all of these users as long as they @@ -81,11 +84,25 @@ class AccountValidityHandler: user_id=user["user_id"], expiration_ts=user["expiration_ts_ms"] ) - async def send_renewal_email_to_user(self, user_id: str): + async def send_renewal_email_to_user(self, user_id: str) -> None: + """ + Send a renewal email for a specific user. + + Args: + user_id: The user ID to send a renewal email for. + + Raises: + SynapseError if the user is not set to renew. + """ expiration_ts = await self.store.get_expiration_ts_for_user(user_id) + + # If this user isn't set to be expired, raise an error. + if expiration_ts is None: + raise SynapseError(400, "User has no expiration time: %s" % (user_id,)) + await self._send_renewal_email(user_id, expiration_ts) - async def _send_renewal_email(self, user_id: str, expiration_ts: int): + async def _send_renewal_email(self, user_id: str, expiration_ts: int) -> None: """Sends out a renewal email to every email address attached to the given user with a unique link allowing them to renew their account. diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 3875e53c08..14348faaf3 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -131,7 +131,7 @@ class ProfileHandler(BaseHandler): profile = await self.store.get_from_remote_profile_cache(user_id) return profile or {} - async def get_displayname(self, target_user: UserID) -> str: + async def get_displayname(self, target_user: UserID) -> Optional[str]: if self.hs.is_mine(target_user): try: displayname = await self.store.get_profile_displayname( @@ -218,7 +218,7 @@ class ProfileHandler(BaseHandler): await self._update_join_states(requester, target_user) - async def get_avatar_url(self, target_user: UserID) -> str: + async def get_avatar_url(self, target_user: UserID) -> Optional[str]: if self.hs.is_mine(target_user): try: avatar_url = await self.store.get_profile_avatar_url( diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py index a6d1eb908a..0e25ca3d7a 100644 --- a/synapse/storage/databases/main/profile.py +++ b/synapse/storage/databases/main/profile.py @@ -39,7 +39,7 @@ class ProfileWorkerStore(SQLBaseStore): avatar_url=profile["avatar_url"], display_name=profile["displayname"] ) - async def get_profile_displayname(self, user_localpart: str) -> str: + async def get_profile_displayname(self, user_localpart: str) -> Optional[str]: return await self.db_pool.simple_select_one_onecol( table="profiles", keyvalues={"user_id": user_localpart}, @@ -47,7 +47,7 @@ class ProfileWorkerStore(SQLBaseStore): desc="get_profile_displayname", ) - async def get_profile_avatar_url(self, user_localpart: str) -> str: + async def get_profile_avatar_url(self, user_localpart: str) -> Optional[str]: return await self.db_pool.simple_select_one_onecol( table="profiles", keyvalues={"user_id": user_localpart}, diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index b0329e17ec..e7b17a7385 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -240,13 +240,13 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): desc="get_renewal_token_for_user", ) - async def get_users_expiring_soon(self) -> List[Dict[str, int]]: + async def get_users_expiring_soon(self) -> List[Dict[str, Any]]: """Selects users whose account will expire in the [now, now + renew_at] time window (see configuration for account_validity for information on what renew_at refers to). Returns: - A list of dictionaries mapping user ID to expiration time (in milliseconds). + A list of dictionaries, each with a user ID and expiration time (in milliseconds). """ def select_users_txn(txn, now_ms, renew_at): -- cgit 1.5.1 From 9b7c28283abf62378cecbde3523b5709448e4140 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 27 Oct 2020 15:12:31 +0100 Subject: Add admin API to list users' local media (#8647) Add admin API `GET /_synapse/admin/v1/users//media` to get information of users' uploaded files. --- changelog.d/8647.feature | 1 + docs/admin_api/user_admin_api.rst | 83 ++++++ synapse/rest/admin/__init__.py | 2 + synapse/rest/admin/users.py | 67 ++++- .../storage/databases/main/events_bg_updates.py | 7 + synapse/storage/databases/main/media_repository.py | 51 ++++ .../schema/delta/58/22users_have_local_media.sql | 2 + tests/rest/admin/test_user.py | 282 +++++++++++++++++++++ 8 files changed, 494 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8647.feature create mode 100644 synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql (limited to 'synapse/storage') diff --git a/changelog.d/8647.feature b/changelog.d/8647.feature new file mode 100644 index 0000000000..79e98f6e90 --- /dev/null +++ b/changelog.d/8647.feature @@ -0,0 +1 @@ +Add an admin API `GET /_synapse/admin/v1/users//media` to get information about uploaded media. Contributed by @dklimpel. \ No newline at end of file diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst index 0f3d99c826..636fc284e4 100644 --- a/docs/admin_api/user_admin_api.rst +++ b/docs/admin_api/user_admin_api.rst @@ -341,6 +341,89 @@ The following fields are returned in the JSON response body: - ``total`` - Number of rooms. +List media of an user +================================ +Gets a list of all local media that a specific ``user_id`` has created. +The response is ordered by creation date descending and media ID descending. +The newest media is on top. + +The API is:: + + GET /_synapse/admin/v1/users//media + +To use it, you will need to authenticate by providing an ``access_token`` for a +server admin: see `README.rst `_. + +A response body like the following is returned: + +.. code:: json + + { + "media": [ + { + "created_ts": 100400, + "last_access_ts": null, + "media_id": "qXhyRzulkwLsNHTbpHreuEgo", + "media_length": 67, + "media_type": "image/png", + "quarantined_by": null, + "safe_from_quarantine": false, + "upload_name": "test1.png" + }, + { + "created_ts": 200400, + "last_access_ts": null, + "media_id": "FHfiSnzoINDatrXHQIXBtahw", + "media_length": 67, + "media_type": "image/png", + "quarantined_by": null, + "safe_from_quarantine": false, + "upload_name": "test2.png" + } + ], + "next_token": 3, + "total": 2 + } + +To paginate, check for ``next_token`` and if present, call the endpoint again +with ``from`` set to the value of ``next_token``. This will return a new page. + +If the endpoint does not return a ``next_token`` then there are no more +reports to paginate through. + +**Parameters** + +The following parameters should be set in the URL: + +- ``user_id`` - string - fully qualified: for example, ``@user:server.com``. +- ``limit``: string representing a positive integer - Is optional but is used for pagination, + denoting the maximum number of items to return in this call. Defaults to ``100``. +- ``from``: string representing a positive integer - Is optional but used for pagination, + denoting the offset in the returned results. This should be treated as an opaque value and + not explicitly set to anything other than the return value of ``next_token`` from a previous call. + Defaults to ``0``. + +**Response** + +The following fields are returned in the JSON response body: + +- ``media`` - An array of objects, each containing information about a media. + Media objects contain the following fields: + + - ``created_ts`` - integer - Timestamp when the content was uploaded in ms. + - ``last_access_ts`` - integer - Timestamp when the content was last accessed in ms. + - ``media_id`` - string - The id used to refer to the media. + - ``media_length`` - integer - Length of the media in bytes. + - ``media_type`` - string - The MIME-type of the media. + - ``quarantined_by`` - string - The user ID that initiated the quarantine request + for this media. + + - ``safe_from_quarantine`` - bool - Status if this media is safe from quarantining. + - ``upload_name`` - string - The name the media was uploaded with. + +- ``next_token``: integer - Indication for pagination. See above. +- ``total`` - integer - Total number of media. + User devices ============ diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index df14bdf26e..a79996cae1 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -53,6 +53,7 @@ from synapse.rest.admin.users import ( ResetPasswordRestServlet, SearchUsersRestServlet, UserAdminServlet, + UserMediaRestServlet, UserMembershipRestServlet, UserRegisterServlet, UserRestServletV2, @@ -218,6 +219,7 @@ def register_servlets(hs, http_server): SendServerNoticeServlet(hs).register(http_server) VersionServlet(hs).register(http_server) UserAdminServlet(hs).register(http_server) + UserMediaRestServlet(hs).register(http_server) UserMembershipRestServlet(hs).register(http_server) UserRestServletV2(hs).register(http_server) UsersRestServletV2(hs).register(http_server) diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index e71d9b0e1c..933bb45346 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -16,6 +16,7 @@ import hashlib import hmac import logging from http import HTTPStatus +from typing import Tuple from synapse.api.constants import UserTypes from synapse.api.errors import Codes, NotFoundError, SynapseError @@ -27,13 +28,14 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.rest.admin._base import ( admin_patterns, assert_requester_is_admin, assert_user_is_admin, historical_admin_path_patterns, ) -from synapse.types import UserID +from synapse.types import JsonDict, UserID logger = logging.getLogger(__name__) @@ -709,3 +711,66 @@ class UserMembershipRestServlet(RestServlet): room_ids = await self.store.get_rooms_for_user(user_id) ret = {"joined_rooms": list(room_ids), "total": len(room_ids)} return 200, ret + + +class UserMediaRestServlet(RestServlet): + """ + Gets information about all uploaded local media for a specific `user_id`. + + Example: + http://localhost:8008/_synapse/admin/v1/users/ + @user:server/media + + Args: + The parameters `from` and `limit` are required for pagination. + By default, a `limit` of 100 is used. + Returns: + A list of media and an integer representing the total number of + media that exist given for this user + """ + + PATTERNS = admin_patterns("/users/(?P[^/]+)/media$") + + def __init__(self, hs): + self.is_mine = hs.is_mine + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + async def on_GET( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + if not self.is_mine(UserID.from_string(user_id)): + raise SynapseError(400, "Can only lookup local users") + + user = await self.store.get_user_by_id(user_id) + if user is None: + raise NotFoundError("Unknown user") + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + + if start < 0: + raise SynapseError( + 400, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + 400, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + media, total = await self.store.get_local_media_by_user_paginate( + start, limit, user_id + ) + + ret = {"media": media, "total": total} + if (start + limit) < total: + ret["next_token"] = start + len(media) + + return 200, ret diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 5e4af2eb51..97b6754846 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -92,6 +92,13 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): where_clause="NOT have_censored", ) + self.db_pool.updates.register_background_index_update( + "users_have_local_media", + index_name="users_have_local_media", + table="local_media_repository", + columns=["user_id", "created_ts"], + ) + async def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 7ef5f1bf2b..daf57675d8 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -116,6 +116,57 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): desc="get_local_media", ) + async def get_local_media_by_user_paginate( + self, start: int, limit: int, user_id: str + ) -> Tuple[List[Dict[str, Any]], int]: + """Get a paginated list of metadata for a local piece of media + which an user_id has uploaded + + Args: + start: offset in the list + limit: maximum amount of media_ids to retrieve + user_id: fully-qualified user id + Returns: + A paginated list of all metadata of user's media, + plus the total count of all the user's media + """ + + def get_local_media_by_user_paginate_txn(txn): + + args = [user_id] + sql = """ + SELECT COUNT(*) as total_media + FROM local_media_repository + WHERE user_id = ? + """ + txn.execute(sql, args) + count = txn.fetchone()[0] + + sql = """ + SELECT + "media_id", + "media_type", + "media_length", + "upload_name", + "created_ts", + "last_access_ts", + "quarantined_by", + "safe_from_quarantine" + FROM local_media_repository + WHERE user_id = ? + ORDER BY created_ts DESC, media_id DESC + LIMIT ? OFFSET ? + """ + + args += [limit, start] + txn.execute(sql, args) + media = self.db_pool.cursor_to_dict(txn) + return media, count + + return await self.db_pool.runInteraction( + "get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn + ) + async def get_local_media_before( self, before_ts: int, size_gt: int, keep_profiles: bool, ) -> Optional[List[str]]: diff --git a/synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql b/synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql new file mode 100644 index 0000000000..a2842687f1 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql @@ -0,0 +1,2 @@ +INSERT INTO background_updates (update_name, progress_json) VALUES + ('users_have_local_media', '{}'); \ No newline at end of file diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index d4b7ae21d1..e815b92329 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -17,6 +17,7 @@ import hashlib import hmac import json import urllib.parse +from binascii import unhexlify from mock import Mock @@ -1115,3 +1116,284 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase): self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(number_rooms, channel.json_body["total"]) self.assertEqual(number_rooms, len(channel.json_body["joined_rooms"])) + + +class UserMediaRestTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.media_repo = hs.get_media_repository_resource() + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.url = "/_synapse/admin/v1/users/%s/media" % urllib.parse.quote( + self.other_user + ) + + def test_no_auth(self): + """ + Try to list media of an user without authentication. + """ + request, channel = self.make_request("GET", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error is returned. + """ + other_user_token = self.login("user", "pass") + + request, channel = self.make_request( + "GET", self.url, access_token=other_user_token, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_user_does_not_exist(self): + """ + Tests that a lookup for a user that does not exist returns a 404 + """ + url = "/_synapse/admin/v1/users/@unknown_person:test/media" + request, channel = self.make_request( + "GET", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(404, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_user_is_not_local(self): + """ + Tests that a lookup for a user that is not a local returns a 400 + """ + url = "/_synapse/admin/v1/users/@unknown_person:unknown_domain/media" + + request, channel = self.make_request( + "GET", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual("Can only lookup local users", channel.json_body["error"]) + + def test_limit(self): + """ + Testing list of media with limit + """ + + number_media = 20 + other_user_tok = self.login("user", "pass") + self._create_media(other_user_tok, number_media) + + request, channel = self.make_request( + "GET", self.url + "?limit=5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_media) + self.assertEqual(len(channel.json_body["media"]), 5) + self.assertEqual(channel.json_body["next_token"], 5) + self._check_fields(channel.json_body["media"]) + + def test_from(self): + """ + Testing list of media with a defined starting point (from) + """ + + number_media = 20 + other_user_tok = self.login("user", "pass") + self._create_media(other_user_tok, number_media) + + request, channel = self.make_request( + "GET", self.url + "?from=5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_media) + self.assertEqual(len(channel.json_body["media"]), 15) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["media"]) + + def test_limit_and_from(self): + """ + Testing list of media with a defined starting point and limit + """ + + number_media = 20 + other_user_tok = self.login("user", "pass") + self._create_media(other_user_tok, number_media) + + request, channel = self.make_request( + "GET", self.url + "?from=5&limit=10", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_media) + self.assertEqual(channel.json_body["next_token"], 15) + self.assertEqual(len(channel.json_body["media"]), 10) + self._check_fields(channel.json_body["media"]) + + def test_limit_is_negative(self): + """ + Testing that a negative limit parameter returns a 400 + """ + + request, channel = self.make_request( + "GET", self.url + "?limit=-5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + def test_from_is_negative(self): + """ + Testing that a negative from parameter returns a 400 + """ + + request, channel = self.make_request( + "GET", self.url + "?from=-5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + def test_next_token(self): + """ + Testing that `next_token` appears at the right place + """ + + number_media = 20 + other_user_tok = self.login("user", "pass") + self._create_media(other_user_tok, number_media) + + # `next_token` does not appear + # Number of results is the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=20", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_media) + self.assertEqual(len(channel.json_body["media"]), number_media) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does not appear + # Number of max results is larger than the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=21", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_media) + self.assertEqual(len(channel.json_body["media"]), number_media) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does appear + # Number of max results is smaller than the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=19", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_media) + self.assertEqual(len(channel.json_body["media"]), 19) + self.assertEqual(channel.json_body["next_token"], 19) + + # Check + # Set `from` to value of `next_token` for request remaining entries + # `next_token` does not appear + request, channel = self.make_request( + "GET", self.url + "?from=19", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_media) + self.assertEqual(len(channel.json_body["media"]), 1) + self.assertNotIn("next_token", channel.json_body) + + def test_user_has_no_media(self): + """ + Tests that a normal lookup for media is successfully + if user has no media created + """ + + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + self.assertEqual(0, len(channel.json_body["media"])) + + def test_get_media(self): + """ + Tests that a normal lookup for media is successfully + """ + + number_media = 5 + other_user_tok = self.login("user", "pass") + self._create_media(other_user_tok, number_media) + + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(number_media, channel.json_body["total"]) + self.assertEqual(number_media, len(channel.json_body["media"])) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["media"]) + + def _create_media(self, user_token, number_media): + """ + Create a number of media for a specific user + """ + upload_resource = self.media_repo.children[b"upload"] + for i in range(number_media): + # file size is 67 Byte + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + self.helper.upload_media( + upload_resource, image_data, tok=user_token, expect_code=200 + ) + + def _check_fields(self, content): + """Checks that all attributes are present in content + """ + for m in content: + self.assertIn("media_id", m) + self.assertIn("media_type", m) + self.assertIn("media_length", m) + self.assertIn("upload_name", m) + self.assertIn("created_ts", m) + self.assertIn("last_access_ts", m) + self.assertIn("quarantined_by", m) + self.assertIn("safe_from_quarantine", m) -- cgit 1.5.1