diff options
author | Brendan Abolivier <babolivier@matrix.org> | 2022-02-08 13:25:54 +0000 |
---|---|---|
committer | Brendan Abolivier <babolivier@matrix.org> | 2022-02-08 13:25:54 +0000 |
commit | 0b561a0ea1384db214c274f45b160c538d2ab65d (patch) | |
tree | aad71a937464551ac28cae53e36820f669431980 /synapse/rest | |
parent | Use changelog from develop (diff) | |
parent | Fix wording (diff) | |
download | synapse-0b561a0ea1384db214c274f45b160c538d2ab65d.tar.xz |
Merge branch 'release-v1.52'
Diffstat (limited to '')
-rw-r--r-- | synapse/rest/admin/__init__.py | 8 | ||||
-rw-r--r-- | synapse/rest/admin/federation.py | 100 | ||||
-rw-r--r-- | synapse/rest/admin/rooms.py | 39 | ||||
-rw-r--r-- | synapse/rest/client/account_data.py | 2 | ||||
-rw-r--r-- | synapse/rest/client/register.py | 23 | ||||
-rw-r--r-- | synapse/rest/client/room.py | 39 | ||||
-rw-r--r-- | synapse/rest/client/sync.py | 3 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_html.py | 31 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_url_resource.py | 224 |
9 files changed, 369 insertions, 100 deletions
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 465e06772b..9be9e33c8e 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -41,7 +41,9 @@ from synapse.rest.admin.event_reports import ( EventReportsRestServlet, ) from synapse.rest.admin.federation import ( - DestinationsRestServlet, + DestinationMembershipRestServlet, + DestinationResetConnectionRestServlet, + DestinationRestServlet, ListDestinationsRestServlet, ) from synapse.rest.admin.groups import DeleteGroupAdminRestServlet @@ -267,7 +269,9 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ListRegistrationTokensRestServlet(hs).register(http_server) NewRegistrationTokenRestServlet(hs).register(http_server) RegistrationTokenRestServlet(hs).register(http_server) - DestinationsRestServlet(hs).register(http_server) + DestinationMembershipRestServlet(hs).register(http_server) + DestinationResetConnectionRestServlet(hs).register(http_server) + DestinationRestServlet(hs).register(http_server) ListDestinationsRestServlet(hs).register(http_server) # Some servlets only get registered for the main process. diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py index 8cd3fa189e..d162e0081e 100644 --- a/synapse/rest/admin/federation.py +++ b/synapse/rest/admin/federation.py @@ -16,6 +16,7 @@ from http import HTTPStatus from typing import TYPE_CHECKING, Tuple from synapse.api.errors import Codes, NotFoundError, SynapseError +from synapse.federation.transport.server import Authenticator from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin @@ -90,7 +91,7 @@ class ListDestinationsRestServlet(RestServlet): return HTTPStatus.OK, response -class DestinationsRestServlet(RestServlet): +class DestinationRestServlet(RestServlet): """Get details of a destination. This needs user to have administrator access in Synapse. @@ -145,3 +146,100 @@ class DestinationsRestServlet(RestServlet): } return HTTPStatus.OK, response + + +class DestinationMembershipRestServlet(RestServlet): + """Get list of rooms of a destination. + This needs user to have administrator access in Synapse. + + GET /_synapse/admin/v1/federation/destinations/<destination>/rooms?from=0&limit=10 + + returns: + 200 OK with a list of rooms if success otherwise an error. + + The parameters `from` and `limit` are required only for pagination. + By default, a `limit` of 100 is used. + """ + + PATTERNS = admin_patterns("/federation/destinations/(?P<destination>[^/]*)/rooms$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET( + self, request: SynapseRequest, destination: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + if not await self._store.is_destination_known(destination): + raise NotFoundError("Unknown destination") + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + + if start < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + direction = parse_string(request, "dir", default="f", allowed_values=("f", "b")) + + rooms, total = await self._store.get_destination_rooms_paginate( + destination, start, limit, direction + ) + response = {"rooms": rooms, "total": total} + if (start + limit) < total: + response["next_token"] = str(start + len(rooms)) + + return HTTPStatus.OK, response + + +class DestinationResetConnectionRestServlet(RestServlet): + """Reset destinations' connection timeouts and wake it up. + This needs user to have administrator access in Synapse. + + POST /_synapse/admin/v1/federation/destinations/<destination>/reset_connection + {} + + returns: + 200 OK otherwise an error. + """ + + PATTERNS = admin_patterns( + "/federation/destinations/(?P<destination>[^/]+)/reset_connection$" + ) + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + self._authenticator = Authenticator(hs) + + async def on_POST( + self, request: SynapseRequest, destination: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + if not await self._store.is_destination_known(destination): + raise NotFoundError("Unknown destination") + + retry_timings = await self._store.get_destination_retry_timings(destination) + if not (retry_timings and retry_timings.retry_last_ts): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "The retry timing does not need to be reset for this destination.", + ) + + # reset timings and wake up + await self._authenticator.reset_retry_timings(destination) + + return HTTPStatus.OK, {} diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index efe25fe7eb..5b706efbcf 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -729,7 +729,7 @@ class RoomEventContextServlet(RestServlet): else: event_filter = None - results = await self.room_context_handler.get_event_context( + event_context = await self.room_context_handler.get_event_context( requester, room_id, event_id, @@ -738,25 +738,34 @@ class RoomEventContextServlet(RestServlet): use_admin_priviledge=True, ) - if not results: + if not event_context: raise SynapseError( HTTPStatus.NOT_FOUND, "Event not found.", errcode=Codes.NOT_FOUND ) time_now = self.clock.time_msec() - aggregations = results.pop("aggregations", None) - results["events_before"] = self._event_serializer.serialize_events( - results["events_before"], time_now, bundle_aggregations=aggregations - ) - results["event"] = self._event_serializer.serialize_event( - results["event"], time_now, bundle_aggregations=aggregations - ) - results["events_after"] = self._event_serializer.serialize_events( - results["events_after"], time_now, bundle_aggregations=aggregations - ) - results["state"] = self._event_serializer.serialize_events( - results["state"], time_now - ) + results = { + "events_before": self._event_serializer.serialize_events( + event_context.events_before, + time_now, + bundle_aggregations=event_context.aggregations, + ), + "event": self._event_serializer.serialize_event( + event_context.event, + time_now, + bundle_aggregations=event_context.aggregations, + ), + "events_after": self._event_serializer.serialize_events( + event_context.events_after, + time_now, + bundle_aggregations=event_context.aggregations, + ), + "state": self._event_serializer.serialize_events( + event_context.state, time_now + ), + "start": event_context.start, + "end": event_context.end, + } return HTTPStatus.OK, results diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py index d1badbdf3b..58b8adbd32 100644 --- a/synapse/rest/client/account_data.py +++ b/synapse/rest/client/account_data.py @@ -66,7 +66,7 @@ class AccountDataServlet(RestServlet): raise AuthError(403, "Cannot get account data for other users.") event = await self.store.get_global_account_data_by_type_for_user( - account_data_type, user_id + user_id, account_data_type ) if event is None: diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index 8b56c76aed..e3492f9f93 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -339,12 +339,19 @@ class UsernameAvailabilityRestServlet(RestServlet): ), ) + self.inhibit_user_in_use_error = ( + hs.config.registration.inhibit_user_in_use_error + ) + async def on_GET(self, request: Request) -> Tuple[int, JsonDict]: if not self.hs.config.registration.enable_registration: raise SynapseError( 403, "Registration has been disabled", errcode=Codes.FORBIDDEN ) + if self.inhibit_user_in_use_error: + return 200, {"available": True} + ip = request.getClientIP() with self.ratelimiter.ratelimit(ip) as wait_deferred: await wait_deferred @@ -418,10 +425,14 @@ class RegisterRestServlet(RestServlet): self.ratelimiter = hs.get_registration_ratelimiter() self.password_policy_handler = hs.get_password_policy_handler() self.clock = hs.get_clock() + self.password_auth_provider = hs.get_password_auth_provider() self._registration_enabled = self.hs.config.registration.enable_registration self._refresh_tokens_enabled = ( hs.config.registration.refreshable_access_token_lifetime is not None ) + self._inhibit_user_in_use_error = ( + hs.config.registration.inhibit_user_in_use_error + ) self._registration_flows = _calculate_registration_flows( hs.config, self.auth_handler @@ -564,6 +575,7 @@ class RegisterRestServlet(RestServlet): desired_username, guest_access_token=guest_access_token, assigned_user_id=registered_user_id, + inhibit_user_in_use_error=self._inhibit_user_in_use_error, ) # Check if the user-interactive authentication flows are complete, if @@ -627,7 +639,16 @@ class RegisterRestServlet(RestServlet): if not password_hash: raise SynapseError(400, "Missing params: password", Codes.MISSING_PARAM) - desired_username = params.get("username", None) + desired_username = await ( + self.password_auth_provider.get_username_for_registration( + auth_result, + params, + ) + ) + + if desired_username is None: + desired_username = params.get("username", None) + guest_access_token = params.get("guest_access_token", None) if desired_username is not None: diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 90bb9142a0..90355e44b2 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -706,27 +706,36 @@ class RoomEventContextServlet(RestServlet): else: event_filter = None - results = await self.room_context_handler.get_event_context( + event_context = await self.room_context_handler.get_event_context( requester, room_id, event_id, limit, event_filter ) - if not results: + if not event_context: raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) time_now = self.clock.time_msec() - aggregations = results.pop("aggregations", None) - results["events_before"] = self._event_serializer.serialize_events( - results["events_before"], time_now, bundle_aggregations=aggregations - ) - results["event"] = self._event_serializer.serialize_event( - results["event"], time_now, bundle_aggregations=aggregations - ) - results["events_after"] = self._event_serializer.serialize_events( - results["events_after"], time_now, bundle_aggregations=aggregations - ) - results["state"] = self._event_serializer.serialize_events( - results["state"], time_now - ) + results = { + "events_before": self._event_serializer.serialize_events( + event_context.events_before, + time_now, + bundle_aggregations=event_context.aggregations, + ), + "event": self._event_serializer.serialize_event( + event_context.event, + time_now, + bundle_aggregations=event_context.aggregations, + ), + "events_after": self._event_serializer.serialize_events( + event_context.events_after, + time_now, + bundle_aggregations=event_context.aggregations, + ), + "state": self._event_serializer.serialize_events( + event_context.state, time_now + ), + "start": event_context.start, + "end": event_context.end, + } return 200, results diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index d20ae1421e..f9615da525 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -48,6 +48,7 @@ from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.logging.opentracing import trace +from synapse.storage.databases.main.relations import BundledAggregations from synapse.types import JsonDict, StreamToken from synapse.util import json_decoder @@ -526,7 +527,7 @@ class SyncRestServlet(RestServlet): def serialize( events: Iterable[EventBase], - aggregations: Optional[Dict[str, Dict[str, Any]]] = None, + aggregations: Optional[Dict[str, BundledAggregations]] = None, ) -> List[JsonDict]: return self._event_serializer.serialize_events( events, diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py index 30b067dd42..872a9e72e8 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py @@ -321,14 +321,33 @@ def _iterate_over_text( def rebase_url(url: str, base: str) -> str: - base_parts = list(urlparse.urlparse(base)) + """ + Resolves a potentially relative `url` against an absolute `base` URL. + + For example: + + >>> rebase_url("subpage", "https://example.com/foo/") + 'https://example.com/foo/subpage' + >>> rebase_url("sibling", "https://example.com/foo") + 'https://example.com/sibling' + >>> rebase_url("/bar", "https://example.com/foo/") + 'https://example.com/bar' + >>> rebase_url("https://alice.com/a/", "https://example.com/foo/") + 'https://alice.com/a' + """ + base_parts = urlparse.urlparse(base) + # Convert the parsed URL to a list for (potential) modification. url_parts = list(urlparse.urlparse(url)) - if not url_parts[0]: # fix up schema - url_parts[0] = base_parts[0] or "http" - if not url_parts[1]: # fix up hostname - url_parts[1] = base_parts[1] + # Add a scheme, if one does not exist. + if not url_parts[0]: + url_parts[0] = base_parts.scheme or "http" + # Fix up the hostname, if this is not a data URL. + if url_parts[0] != "data" and not url_parts[1]: + url_parts[1] = base_parts.netloc + # If the path does not start with a /, nest it under the base path's last + # directory. if not url_parts[2].startswith("/"): - url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2] + url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts.path) + url_parts[2] return urlparse.urlunparse(url_parts) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index e8881bc870..efd84ced8f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -21,8 +21,9 @@ import re import shutil import sys import traceback -from typing import TYPE_CHECKING, Iterable, Optional, Tuple +from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple from urllib import parse as urlparse +from urllib.request import urlopen import attr @@ -71,6 +72,17 @@ IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY @attr.s(slots=True, frozen=True, auto_attribs=True) +class DownloadResult: + length: int + uri: str + response_code: int + media_type: str + download_name: Optional[str] + expires: int + etag: Optional[str] + + +@attr.s(slots=True, frozen=True, auto_attribs=True) class MediaInfo: """ Information parsed from downloading media being previewed. @@ -256,7 +268,7 @@ class PreviewUrlResource(DirectServeJsonResource): if oembed_url: url_to_download = oembed_url - media_info = await self._download_url(url_to_download, user) + media_info = await self._handle_url(url_to_download, user) logger.debug("got media_info of '%s'", media_info) @@ -297,7 +309,9 @@ class PreviewUrlResource(DirectServeJsonResource): oembed_url = self._oembed.autodiscover_from_html(tree) og_from_oembed: JsonDict = {} if oembed_url: - oembed_info = await self._download_url(oembed_url, user) + oembed_info = await self._handle_url( + oembed_url, user, allow_data_urls=True + ) ( og_from_oembed, author_name, @@ -367,7 +381,135 @@ class PreviewUrlResource(DirectServeJsonResource): return jsonog.encode("utf8") - async def _download_url(self, url: str, user: UserID) -> MediaInfo: + async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult: + """ + Fetches a remote URL and parses the headers. + + Args: + url: The URL to fetch. + output_stream: The stream to write the content to. + + Returns: + A tuple of: + Media length, URL downloaded, the HTTP response code, + the media type, the downloaded file name, the number of + milliseconds the result is valid for, the etag header. + """ + + try: + logger.debug("Trying to get preview for url '%s'", url) + length, headers, uri, code = await self.client.get_file( + url, + output_stream=output_stream, + max_size=self.max_spider_size, + headers={"Accept-Language": self.url_preview_accept_language}, + ) + except SynapseError: + # Pass SynapseErrors through directly, so that the servlet + # handler will return a SynapseError to the client instead of + # blank data or a 500. + raise + except DNSLookupError: + # DNS lookup returned no results + # Note: This will also be the case if one of the resolved IP + # addresses is blacklisted + raise SynapseError( + 502, + "DNS resolution failure during URL preview generation", + Codes.UNKNOWN, + ) + except Exception as e: + # FIXME: pass through 404s and other error messages nicely + logger.warning("Error downloading %s: %r", url, e) + + raise SynapseError( + 500, + "Failed to download content: %s" + % (traceback.format_exception_only(sys.exc_info()[0], e),), + Codes.UNKNOWN, + ) + + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode("ascii") + else: + media_type = "application/octet-stream" + + download_name = get_filename_from_headers(headers) + + # FIXME: we should calculate a proper expiration based on the + # Cache-Control and Expire headers. But for now, assume 1 hour. + expires = ONE_HOUR + etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None + + return DownloadResult( + length, uri, code, media_type, download_name, expires, etag + ) + + async def _parse_data_url( + self, url: str, output_stream: BinaryIO + ) -> DownloadResult: + """ + Parses a data: URL. + + Args: + url: The URL to parse. + output_stream: The stream to write the content to. + + Returns: + A tuple of: + Media length, URL downloaded, the HTTP response code, + the media type, the downloaded file name, the number of + milliseconds the result is valid for, the etag header. + """ + + try: + logger.debug("Trying to parse data url '%s'", url) + with urlopen(url) as url_info: + # TODO Can this be more efficient. + output_stream.write(url_info.read()) + except Exception as e: + logger.warning("Error parsing data: URL %s: %r", url, e) + + raise SynapseError( + 500, + "Failed to parse data URL: %s" + % (traceback.format_exception_only(sys.exc_info()[0], e),), + Codes.UNKNOWN, + ) + + return DownloadResult( + # Read back the length that has been written. + length=output_stream.tell(), + uri=url, + # If it was parsed, consider this a 200 OK. + response_code=200, + # urlopen shoves the media-type from the data URL into the content type + # header object. + media_type=url_info.headers.get_content_type(), + # Some features are not supported by data: URLs. + download_name=None, + expires=ONE_HOUR, + etag=None, + ) + + async def _handle_url( + self, url: str, user: UserID, allow_data_urls: bool = False + ) -> MediaInfo: + """ + Fetches content from a URL and parses the result to generate a MediaInfo. + + It uses the media storage provider to persist the fetched content and + stores the mapping into the database. + + Args: + url: The URL to fetch. + user: The user who ahs requested this URL. + allow_data_urls: True if data URLs should be allowed. + + Returns: + A MediaInfo object describing the fetched content. + """ + # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? @@ -377,61 +519,27 @@ class PreviewUrlResource(DirectServeJsonResource): file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True) with self.media_storage.store_into_file(file_info) as (f, fname, finish): - try: - logger.debug("Trying to get preview for url '%s'", url) - length, headers, uri, code = await self.client.get_file( - url, - output_stream=f, - max_size=self.max_spider_size, - headers={"Accept-Language": self.url_preview_accept_language}, - ) - except SynapseError: - # Pass SynapseErrors through directly, so that the servlet - # handler will return a SynapseError to the client instead of - # blank data or a 500. - raise - except DNSLookupError: - # DNS lookup returned no results - # Note: This will also be the case if one of the resolved IP - # addresses is blacklisted - raise SynapseError( - 502, - "DNS resolution failure during URL preview generation", - Codes.UNKNOWN, - ) - except Exception as e: - # FIXME: pass through 404s and other error messages nicely - logger.warning("Error downloading %s: %r", url, e) - - raise SynapseError( - 500, - "Failed to download content: %s" - % (traceback.format_exception_only(sys.exc_info()[0], e),), - Codes.UNKNOWN, - ) - await finish() + if url.startswith("data:"): + if not allow_data_urls: + raise SynapseError( + 500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN + ) - if b"Content-Type" in headers: - media_type = headers[b"Content-Type"][0].decode("ascii") + download_result = await self._parse_data_url(url, f) else: - media_type = "application/octet-stream" + download_result = await self._download_url(url, f) - download_name = get_filename_from_headers(headers) - - # FIXME: we should calculate a proper expiration based on the - # Cache-Control and Expire headers. But for now, assume 1 hour. - expires = ONE_HOUR - etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None + await finish() try: time_now_ms = self.clock.time_msec() await self.store.store_local_media( media_id=file_id, - media_type=media_type, + media_type=download_result.media_type, time_now_ms=time_now_ms, - upload_name=download_name, - media_length=length, + upload_name=download_result.download_name, + media_length=download_result.length, user_id=user, url_cache=url, ) @@ -444,16 +552,16 @@ class PreviewUrlResource(DirectServeJsonResource): raise return MediaInfo( - media_type=media_type, - media_length=length, - download_name=download_name, + media_type=download_result.media_type, + media_length=download_result.length, + download_name=download_result.download_name, created_ts_ms=time_now_ms, filesystem_id=file_id, filename=fname, - uri=uri, - response_code=code, - expires=expires, - etag=etag, + uri=download_result.uri, + response_code=download_result.response_code, + expires=download_result.expires, + etag=download_result.etag, ) async def _precache_image_url( @@ -474,8 +582,8 @@ class PreviewUrlResource(DirectServeJsonResource): # FIXME: it might be cleaner to use the same flow as the main /preview_url # request itself and benefit from the same caching etc. But for now we # just rely on the caching on the master request to speed things up. - image_info = await self._download_url( - rebase_url(og["og:image"], media_info.uri), user + image_info = await self._handle_url( + rebase_url(og["og:image"], media_info.uri), user, allow_data_urls=True ) if _is_media(image_info.media_type): |