diff options
36 files changed, 689 insertions, 176 deletions
diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000..ca333961f3 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,12 @@ +[run] +branch = True +parallel = True +source = synapse + +[paths] +source= + coverage + +[report] +precision = 2 +ignore_errors = True diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE/BUG_REPORT.md index 21acb3202a..756759c2d8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.md @@ -1,3 +1,9 @@ +--- +name: Bug report +about: Create a report to help us improve + +--- + <!-- **IF YOU HAVE SUPPORT QUESTIONS ABOUT RUNNING OR CONFIGURING YOUR OWN HOME SERVER**: @@ -11,38 +17,50 @@ the necessary data to fix your issue. You can also preview your report before submitting it. You may remove sections that aren't relevant to your particular case. -Text between <!-- and --​> marks will be invisible in the report. +Text between <!-- and --> marks will be invisible in the report. --> ### Description -Describe here the problem that you are experiencing, or the feature you are requesting. +<!-- Describe here the problem that you are experiencing --> ### Steps to reproduce -- For bugs, list the steps +- list the steps - that reproduce the bug - using hyphens as bullet points +<!-- Describe how what happens differs from what you expected. -<!-- If you can identify any relevant log snippets from _homeserver.log_, please include +If you can identify any relevant log snippets from _homeserver.log_, please include those (please be careful to remove any personal or private data). Please surround them with -``` (three backticks, on a line on their own), so that they are formatted legibly. --> +``` (three backticks, on a line on their own), so that they are formatted legibly. +--> ### Version information <!-- IMPORTANT: please answer the following questions, to help us narrow down the problem --> -- **Homeserver**: Was this issue identified on matrix.org or another homeserver? +<!-- Was this issue identified on matrix.org or another homeserver? --> +- **Homeserver**: If not matrix.org: -- **Version**: What version of Synapse is running? <!-- + +<!-- +What version of Synapse is running? You can find the Synapse version by inspecting the server headers (replace matrix.org with your own homeserver domain): $ curl -v https://matrix.org/_matrix/client/versions 2>&1 | grep "Server:" --> -- **Install method**: package manager/git clone/pip -- **Platform**: Tell us about the environment in which your homeserver is operating - - distro, hardware, if it's running in a vm/container, etc. +- **Version**: + +- **Install method**: +<!-- examples: package manager/git clone/pip --> + +- **Platform**: +<!-- +Tell us about the environment in which your homeserver is operating +distro, hardware, if it's running in a vm/container, etc. +--> diff --git a/.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md b/.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md new file mode 100644 index 0000000000..150a46f505 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md @@ -0,0 +1,9 @@ +--- +name: Feature request +about: Suggest an idea for this project + +--- + +**Description:** + +<!-- Describe here the feature you are requesting. --> diff --git a/.github/ISSUE_TEMPLATE/SUPPORT_REQUEST.md b/.github/ISSUE_TEMPLATE/SUPPORT_REQUEST.md new file mode 100644 index 0000000000..77581596c4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/SUPPORT_REQUEST.md @@ -0,0 +1,9 @@ +--- +name: Support request +about: I need support for Synapse + +--- + +# Please ask for support in [**#matrix:matrix.org**](https://matrix.to/#/#matrix:matrix.org) + +## Don't file an issue as a support request. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000..aa883ba505 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,7 @@ +### Pull Request Checklist + +<!-- Please read CONTRIBUTING.rst before submitting your pull request --> + +* [ ] Pull request is based on the develop branch +* [ ] Pull request includes a [changelog file](CONTRIBUTING.rst#changelog) +* [ ] Pull request includes a [sign off](CONTRIBUTING.rst#sign-off) diff --git a/.github/SUPPORT.md b/.github/SUPPORT.md new file mode 100644 index 0000000000..7a4244f673 --- /dev/null +++ b/.github/SUPPORT.md @@ -0,0 +1,3 @@ +[**#matrix:matrix.org**](https://matrix.to/#/#matrix:matrix.org) is the official support room for Matrix, and can be accessed by any client from https://matrix.org/docs/projects/try-matrix-now.html + +It can also be access via IRC bridge at irc://irc.freenode.net/matrix or on the web here: https://webchat.freenode.net/?channels=matrix diff --git a/MANIFEST.in b/MANIFEST.in index 25cdf0a61b..d0e49713da 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -34,6 +34,7 @@ prune .github prune demo/etc prune docker prune .circleci +prune .coveragerc exclude jenkins* recursive-exclude jenkins *.sh diff --git a/README.rst b/README.rst index 9165db8319..e52b776902 100644 --- a/README.rst +++ b/README.rst @@ -142,7 +142,7 @@ Installing prerequisites on openSUSE:: Installing prerequisites on OpenBSD:: doas pkg_add python libffi py-pip py-setuptools sqlite3 py-virtualenv \ - libxslt + libxslt jpeg To install the Synapse homeserver run:: @@ -729,9 +729,10 @@ port: .. __: `key_management`_ -* Synapse does not currently support SNI on the federation protocol - (`bug #1491 <https://github.com/matrix-org/synapse/issues/1491>`_), which - means that using name-based virtual hosting is unreliable. +* Until v0.33.3, Synapse did not support SNI on the federation port + (`bug #1491 <https://github.com/matrix-org/synapse/issues/1491>`_). This bug + is now fixed, but means that federating with older servers can be unreliable + when using name-based virtual hosting. Furthermore, a number of the normal reasons for using a reverse-proxy do not apply: diff --git a/changelog.d/3830.feature b/changelog.d/3830.feature new file mode 100644 index 0000000000..af472cf763 --- /dev/null +++ b/changelog.d/3830.feature @@ -0,0 +1 @@ +Add option to track MAU stats (but not limit people) diff --git a/changelog.d/4176.bugfix b/changelog.d/4176.bugfix new file mode 100644 index 0000000000..3846f8a27b --- /dev/null +++ b/changelog.d/4176.bugfix @@ -0,0 +1 @@ +The media repository now no longer fails to decode UTF-8 filenames when downloading remote media. diff --git a/changelog.d/4180.misc b/changelog.d/4180.misc new file mode 100644 index 0000000000..80194b3dc0 --- /dev/null +++ b/changelog.d/4180.misc @@ -0,0 +1 @@ +A coveragerc file, as well as the py36-coverage tox target, have been added. diff --git a/changelog.d/4182.misc b/changelog.d/4182.misc new file mode 100644 index 0000000000..62949a065a --- /dev/null +++ b/changelog.d/4182.misc @@ -0,0 +1 @@ +Add a GitHub pull request template and add multiple issue templates diff --git a/changelog.d/4183.bugfix b/changelog.d/4183.bugfix new file mode 100644 index 0000000000..3e9ba3826f --- /dev/null +++ b/changelog.d/4183.bugfix @@ -0,0 +1 @@ +URL previews now correctly decode non-UTF-8 text if the header contains a `<meta http-equiv="Content-Type"` header. diff --git a/changelog.d/4188.misc b/changelog.d/4188.misc new file mode 100644 index 0000000000..80c3995870 --- /dev/null +++ b/changelog.d/4188.misc @@ -0,0 +1 @@ +Update README to reflect the fact that #1491 is fixed diff --git a/changelog.d/4192.bugfix b/changelog.d/4192.bugfix new file mode 100644 index 0000000000..f346fe026a --- /dev/null +++ b/changelog.d/4192.bugfix @@ -0,0 +1 @@ +Fix an issue where public consent URLs had two slashes. diff --git a/changelog.d/4193.misc b/changelog.d/4193.misc new file mode 100644 index 0000000000..23d86eab24 --- /dev/null +++ b/changelog.d/4193.misc @@ -0,0 +1 @@ +Add missing `jpeg` package prerequisite for OpenBSD in README. diff --git a/changelog.d/4197.bugfix b/changelog.d/4197.bugfix new file mode 100644 index 0000000000..c7c01da0f3 --- /dev/null +++ b/changelog.d/4197.bugfix @@ -0,0 +1 @@ +Fallback auth now accepts the session parameter on Python 3. diff --git a/changelog.d/4200.misc b/changelog.d/4200.misc new file mode 100644 index 0000000000..505c98edcf --- /dev/null +++ b/changelog.d/4200.misc @@ -0,0 +1 @@ +Add a note saying you need to manually reclaim disk space after using the Purge History API diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index 2da833c827..a5c3dc8149 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -61,3 +61,11 @@ the following: } The status will be one of ``active``, ``complete``, or ``failed``. + +Reclaim disk space (Postgres) +----------------------------- + +To reclaim the disk space and return it to the operating system, you need to run +`VACUUM FULL;` on the database. + +https://www.postgresql.org/docs/current/sql-vacuum.html diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 415374a2ce..3e4dea2f19 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -535,7 +535,7 @@ def run(hs): current_mau_count = 0 reserved_count = 0 store = hs.get_datastore() - if hs.config.limit_usage_by_mau: + if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: current_mau_count = yield store.get_monthly_active_count() reserved_count = yield store.get_registered_reserved_users_count() current_mau_gauge.set(float(current_mau_count)) diff --git a/synapse/config/server.py b/synapse/config/server.py index c1c7c0105e..5ff9ac288d 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -77,6 +77,7 @@ class ServerConfig(Config): self.max_mau_value = config.get( "max_mau_value", 0, ) + self.mau_stats_only = config.get("mau_stats_only", False) self.mau_limits_reserved_threepids = config.get( "mau_limit_reserved_threepids", [] @@ -372,6 +373,11 @@ class ServerConfig(Config): # max_mau_value: 50 # mau_trial_days: 2 # + # If enabled, the metrics for the number of monthly active users will + # be populated, however no one will be limited. If limit_usage_by_mau + # is true, this is implied to be true. + # mau_stats_only: False + # # Sometimes the server admin will want to ensure certain accounts are # never blocked by mau checking. These accounts are specified here. # diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index a958c45271..c6e89db4bc 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -473,7 +473,7 @@ class AuthHandler(BaseHandler): "version": self.hs.config.user_consent_version, "en": { "name": self.hs.config.user_consent_policy_name, - "url": "%s/_matrix/consent?v=%s" % ( + "url": "%s_matrix/consent?v=%s" % ( self.hs.config.public_baseurl, self.hs.config.user_consent_version, ), diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/v2_alpha/auth.py index a8d8ed6590..fa73bdf3a1 100644 --- a/synapse/rest/client/v2_alpha/auth.py +++ b/synapse/rest/client/v2_alpha/auth.py @@ -21,7 +21,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import SynapseError from synapse.api.urls import CLIENT_V2_ALPHA_PREFIX from synapse.http.server import finish_request -from synapse.http.servlet import RestServlet +from synapse.http.servlet import RestServlet, parse_string from ._base import client_v2_patterns @@ -131,16 +131,12 @@ class AuthRestServlet(RestServlet): self.auth_handler = hs.get_auth_handler() self.registration_handler = hs.get_handlers().registration_handler - @defer.inlineCallbacks def on_GET(self, request, stagetype): - yield - if stagetype == LoginType.RECAPTCHA: - if ('session' not in request.args or - len(request.args['session']) == 0): - raise SynapseError(400, "No session supplied") - - session = request.args["session"][0] + session = parse_string(request, "session") + if not session: + raise SynapseError(400, "No session supplied") + if stagetype == LoginType.RECAPTCHA: html = RECAPTCHA_TEMPLATE % { 'session': session, 'myurl': "%s/auth/%s/fallback/web" % ( @@ -155,13 +151,11 @@ class AuthRestServlet(RestServlet): request.write(html_bytes) finish_request(request) - defer.returnValue(None) + return None elif stagetype == LoginType.TERMS: - session = request.args['session'][0] - html = TERMS_TEMPLATE % { 'session': session, - 'terms_url': "%s/_matrix/consent?v=%s" % ( + 'terms_url': "%s_matrix/consent?v=%s" % ( self.hs.config.public_baseurl, self.hs.config.user_consent_version, ), @@ -176,25 +170,25 @@ class AuthRestServlet(RestServlet): request.write(html_bytes) finish_request(request) - defer.returnValue(None) + return None else: raise SynapseError(404, "Unknown auth stage type") @defer.inlineCallbacks def on_POST(self, request, stagetype): - yield + + session = parse_string(request, "session") + if not session: + raise SynapseError(400, "No session supplied") + if stagetype == LoginType.RECAPTCHA: - if ('g-recaptcha-response' not in request.args or - len(request.args['g-recaptcha-response'])) == 0: - raise SynapseError(400, "No captcha response supplied") - if ('session' not in request.args or - len(request.args['session'])) == 0: - raise SynapseError(400, "No session supplied") + response = parse_string(request, "g-recaptcha-response") - session = request.args['session'][0] + if not response: + raise SynapseError(400, "No captcha response supplied") authdict = { - 'response': request.args['g-recaptcha-response'][0], + 'response': response, 'session': session, } @@ -242,7 +236,7 @@ class AuthRestServlet(RestServlet): else: html = TERMS_TEMPLATE % { 'session': session, - 'terms_url': "%s/_matrix/consent?v=%s" % ( + 'terms_url': "%s_matrix/consent?v=%s" % ( self.hs.config.public_baseurl, self.hs.config.user_consent_version, ), diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 76e479afa3..efe42a429d 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,6 +16,7 @@ import logging import os +from six import PY3 from six.moves import urllib from twisted.internet import defer @@ -48,26 +49,21 @@ def parse_media_id(request): return server_name, media_id, file_name except Exception: raise SynapseError( - 404, - "Invalid media id token %r" % (request.postpath,), - Codes.UNKNOWN, + 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN ) def respond_404(request): respond_with_json( - request, 404, - cs_error( - "Not found %r" % (request.postpath,), - code=Codes.NOT_FOUND, - ), - send_cors=True + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, ) @defer.inlineCallbacks -def respond_with_file(request, media_type, file_path, - file_size=None, upload_name=None): +def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None): logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): @@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name): file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ + def _quote(x): return urllib.parse.quote(x.encode("utf-8")) request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): - disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename=%s" % (_quote(upload_name),) else: - disposition = ( - "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) - request.setHeader(b"Content-Disposition", disposition) + request.setHeader(b"Content-Disposition", disposition.encode('ascii')) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") + request.setHeader(b"Content-Length", b"%d" % (file_size,)) @defer.inlineCallbacks @@ -153,6 +144,7 @@ class Responder(object): Responder is a context manager which *must* be used, so that any resources held can be cleaned up. """ + def write_to_consumer(self, consumer): """Stream response into consumer @@ -186,9 +178,18 @@ class FileInfo(object): thumbnail_method (str) thumbnail_type (str): Content type of thumbnail, e.g. image/png """ - def __init__(self, server_name, file_id, url_cache=False, - thumbnail=False, thumbnail_width=None, thumbnail_height=None, - thumbnail_method=None, thumbnail_type=None): + + def __init__( + self, + server_name, + file_id, + url_cache=False, + thumbnail=False, + thumbnail_width=None, + thumbnail_height=None, + thumbnail_method=None, + thumbnail_type=None, + ): self.server_name = server_name self.file_id = file_id self.url_cache = url_cache @@ -197,3 +198,74 @@ class FileInfo(object): self.thumbnail_height = thumbnail_height self.thumbnail_method = thumbnail_method self.thumbnail_type = thumbnail_type + + +def get_filename_from_headers(headers): + """ + Get the filename of the downloaded file by inspecting the + Content-Disposition HTTP header. + + Args: + headers (twisted.web.http_headers.Headers): The HTTP + request headers. + + Returns: + A Unicode string of the filename, or None. + """ + content_disposition = headers.get(b"Content-Disposition", [b'']) + + # No header, bail out. + if not content_disposition[0]: + return + + # dict of unicode: bytes, corresponding to the key value sections of the + # Content-Disposition header. + params = {} + parts = content_disposition[0].split(b";") + for i in parts: + # Split into key-value pairs, if able + # We don't care about things like `inline`, so throw it out + if b"=" not in i: + continue + + key, value = i.strip().split(b"=") + params[key.decode('ascii')] = value + + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith(b"utf-8''"): + upload_name_utf8 = upload_name_utf8[7:] + # We have a filename*= section. This MUST be ASCII, and any UTF-8 + # bytes are %-quoted. + if PY3: + try: + # Once it is decoded, we can then unquote the %-encoded + # parts strictly into a unicode string. + upload_name = urllib.parse.unquote( + upload_name_utf8.decode('ascii'), errors="strict" + ) + except UnicodeDecodeError: + # Incorrect UTF-8. + pass + else: + # On Python 2, we first unquote the %-encoded parts and then + # decode it strictly using UTF-8. + try: + upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') + except UnicodeDecodeError: + pass + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + # Make sure there's no %-quoted bytes. If there is, reject it as + # non-valid ASCII. + if b"%" not in upload_name_ascii: + upload_name = upload_name_ascii.decode('ascii') + + # This may be None here, indicating we did not find a matching name. + return upload_name diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d6c5f07af0..e117836e9a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -14,14 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import errno import logging import os import shutil -from six import PY3, iteritems -from six.moves.urllib import parse as urlparse +from six import iteritems import twisted.internet.error import twisted.web.http @@ -34,14 +32,18 @@ from synapse.api.errors import ( NotFoundError, SynapseError, ) -from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util import logcontext from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string -from ._base import FileInfo, respond_404, respond_with_responder +from ._base import ( + FileInfo, + get_filename_from_headers, + respond_404, + respond_with_responder, +) from .config_resource import MediaConfigResource from .download_resource import DownloadResource from .filepath import MediaFilePaths @@ -62,7 +64,7 @@ class MediaRepository(object): def __init__(self, hs): self.hs = hs self.auth = hs.get_auth() - self.client = MatrixFederationHttpClient(hs) + self.client = hs.get_http_client() self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() @@ -397,39 +399,9 @@ class MediaRepository(object): yield finish() media_type = headers[b"Content-Type"][0].decode('ascii') - + upload_name = get_filename_from_headers(headers) time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0].decode('ascii'),) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - if PY3: - upload_name = urlparse.unquote(upload_name) - else: - upload_name = urlparse.unquote(upload_name.encode('ascii')) - try: - if isinstance(upload_name, bytes): - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 91d1dafe64..d0ecf241b6 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import datetime import errno import fnmatch @@ -44,15 +43,19 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_integer, parse_string from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.media.v1._base import get_filename_from_headers from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string from ._base import FileInfo logger = logging.getLogger(__name__) +_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I) +_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) + class PreviewUrlResource(Resource): isLeaf = True @@ -223,15 +226,25 @@ class PreviewUrlResource(Resource): with open(media_info['filename'], 'rb') as file: body = file.read() - # clobber the encoding from the content-type, or default to utf-8 - # XXX: this overrides any <meta/> or XML charset headers in the body - # which may pose problems, but so far seems to work okay. - match = re.match( - r'.*; *charset="?(.*?)"?(;|$)', - media_info['media_type'], - re.I - ) - encoding = match.group(1) if match else "utf-8" + encoding = None + + # Let's try and figure out if it has an encoding set in a meta tag. + # Limit it to the first 1kb, since it ought to be in the meta tags + # at the top. + match = _charset_match.search(body[:1000]) + + # If we find a match, it should take precedence over the + # Content-Type header, so set it here. + if match: + encoding = match.group(1).decode('ascii') + + # If we don't find a match, we'll look at the HTTP Content-Type, and + # if that doesn't exist, we'll fall back to UTF-8. + if not encoding: + match = _content_type_match.match( + media_info['media_type'] + ) + encoding = match.group(1) if match else "utf-8" og = decode_and_calc_og(body, media_info['uri'], encoding) @@ -323,31 +336,7 @@ class PreviewUrlResource(Resource): media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - download_name = None - - # First check if there is a valid UTF-8 filename - download_name_utf8 = params.get("filename*", None) - if download_name_utf8: - if download_name_utf8.lower().startswith("utf-8''"): - download_name = download_name_utf8[7:] - - # If there isn't check for an ascii name. - if not download_name: - download_name_ascii = params.get("filename", None) - if download_name_ascii and is_ascii(download_name_ascii): - download_name = download_name_ascii - - if download_name: - download_name = urlparse.unquote(download_name) - try: - download_name = download_name.decode("utf-8") - except UnicodeDecodeError: - download_name = None - else: - download_name = None + download_name = get_filename_from_headers(headers) yield self.store.store_local_media( media_id=file_id, diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index cf4104dc2e..c353b11c9a 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -96,37 +96,38 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn.execute(sql, query_args) - # If MAU user count still exceeds the MAU threshold, then delete on - # a least recently active basis. - # Note it is not possible to write this query using OFFSET due to - # incompatibilities in how sqlite and postgres support the feature. - # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present - # While Postgres does not require 'LIMIT', but also does not support - # negative LIMIT values. So there is no way to write it that both can - # support - safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) - # Must be greater than zero for postgres - safe_guard = safe_guard if safe_guard > 0 else 0 - query_args = [safe_guard] - - base_sql = """ - DELETE FROM monthly_active_users - WHERE user_id NOT IN ( - SELECT user_id FROM monthly_active_users - ORDER BY timestamp DESC - LIMIT ? + if self.hs.config.limit_usage_by_mau: + # If MAU user count still exceeds the MAU threshold, then delete on + # a least recently active basis. + # Note it is not possible to write this query using OFFSET due to + # incompatibilities in how sqlite and postgres support the feature. + # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present + # While Postgres does not require 'LIMIT', but also does not support + # negative LIMIT values. So there is no way to write it that both can + # support + safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) + # Must be greater than zero for postgres + safe_guard = safe_guard if safe_guard > 0 else 0 + query_args = [safe_guard] + + base_sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + ORDER BY timestamp DESC + LIMIT ? + ) + """ + # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres + # when len(reserved_users) == 0. Works fine on sqlite. + if len(self.reserved_users) > 0: + query_args.extend(self.reserved_users) + sql = base_sql + """ AND user_id NOT IN ({})""".format( + ','.join(questionmarks) ) - """ - # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres - # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( - ','.join(questionmarks) - ) - else: - sql = base_sql - txn.execute(sql, query_args) + else: + sql = base_sql + txn.execute(sql, query_args) yield self.runInteraction("reap_monthly_active_users", _reap_users) # It seems poor to invalidate the whole cache, Postgres supports @@ -252,8 +253,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): Args: user_id(str): the user_id to query """ - - if self.hs.config.limit_usage_by_mau: + if self.hs.config.limit_usage_by_mau or self.hs.config.mau_stats_only: # Trial users and guests should not be included as part of MAU group is_guest = yield self.is_guest(user_id) if is_guest: @@ -271,8 +271,14 @@ class MonthlyActiveUsersStore(SQLBaseStore): # but only update if we have not previously seen the user for # LAST_SEEN_GRANULARITY ms if last_seen_timestamp is None: - count = yield self.get_monthly_active_count() - if count < self.hs.config.max_mau_value: + # In the case where mau_stats_only is True and limit_usage_by_mau is + # False, there is no point in checking get_monthly_active_count - it + # adds no value and will break the logic if max_mau_value is exceeded. + if not self.hs.config.limit_usage_by_mau: yield self.upsert_monthly_active_user(user_id) + else: + count = yield self.get_monthly_active_count() + if count < self.hs.config.max_mau_value: + yield self.upsert_monthly_active_user(user_id) elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY: yield self.upsert_monthly_active_user(user_id) diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py new file mode 100644 index 0000000000..7fa120a10f --- /dev/null +++ b/tests/rest/client/v2_alpha/test_auth.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet.defer import succeed + +from synapse.api.constants import LoginType +from synapse.rest.client.v1 import admin +from synapse.rest.client.v2_alpha import auth, register + +from tests import unittest + + +class FallbackAuthTests(unittest.HomeserverTestCase): + + servlets = [ + auth.register_servlets, + admin.register_servlets, + register.register_servlets, + ] + hijack_auth = False + + def make_homeserver(self, reactor, clock): + + config = self.default_config() + + config.enable_registration_captcha = True + config.recaptcha_public_key = "brokencake" + config.registrations_require_3pid = [] + + hs = self.setup_test_homeserver(config=config) + return hs + + def prepare(self, reactor, clock, hs): + auth_handler = hs.get_auth_handler() + + self.recaptcha_attempts = [] + + def _recaptcha(authdict, clientip): + self.recaptcha_attempts.append((authdict, clientip)) + return succeed(True) + + auth_handler.checkers[LoginType.RECAPTCHA] = _recaptcha + + @unittest.INFO + def test_fallback_captcha(self): + + request, channel = self.make_request( + "POST", + "register", + {"username": "user", "type": "m.login.password", "password": "bar"}, + ) + self.render(request) + + # Returns a 401 as per the spec + self.assertEqual(request.code, 401) + # Grab the session + session = channel.json_body["session"] + # Assert our configured public key is being given + self.assertEqual( + channel.json_body["params"]["m.login.recaptcha"]["public_key"], "brokencake" + ) + + request, channel = self.make_request( + "GET", "auth/m.login.recaptcha/fallback/web?session=" + session + ) + self.render(request) + self.assertEqual(request.code, 200) + + request, channel = self.make_request( + "POST", + "auth/m.login.recaptcha/fallback/web?session=" + + session + + "&g-recaptcha-response=a", + ) + self.render(request) + self.assertEqual(request.code, 200) + + # The recaptcha handler is called with the response given + self.assertEqual(len(self.recaptcha_attempts), 1) + self.assertEqual(self.recaptcha_attempts[0][0]["response"], "a") + + # Now we have fufilled the recaptcha fallback step, we can then send a + # request to the register API with the session in the authdict. + request, channel = self.make_request( + "POST", "register", {"auth": {"session": session}} + ) + self.render(request) + self.assertEqual(channel.code, 200) + + # We're given a registered user. + self.assertEqual(channel.json_body["user_id"], "@user:test") diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index a86901c2d8..fd131e3454 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -17,15 +17,20 @@ import os import shutil import tempfile +from binascii import unhexlify from mock import Mock +from six.moves.urllib import parse from twisted.internet import defer, reactor +from twisted.internet.defer import Deferred +from synapse.config.repository import MediaStorageProviderConfig from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.rest.media.v1.media_storage import MediaStorage from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend +from synapse.util.module_loader import load_module from tests import unittest @@ -83,3 +88,143 @@ class MediaStorageTests(unittest.TestCase): body = f.read() self.assertEqual(test_body, body) + + +class MediaRepoTests(unittest.HomeserverTestCase): + + hijack_auth = True + user_id = "@test:user" + + def make_homeserver(self, reactor, clock): + + self.fetches = [] + + def get_file(destination, path, output_stream, args=None, max_size=None): + """ + Returns tuple[int,dict,str,int] of file length, response headers, + absolute URI, and response code. + """ + + def write_to(r): + data, response = r + output_stream.write(data) + return response + + d = Deferred() + d.addCallback(write_to) + self.fetches.append((d, destination, path, args)) + return d + + client = Mock() + client.get_file = get_file + + self.storage_path = self.mktemp() + os.mkdir(self.storage_path) + + config = self.default_config() + config.media_store_path = self.storage_path + config.thumbnail_requirements = {} + config.max_image_pixels = 2000000 + + provider_config = { + "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend", + "store_local": True, + "store_synchronous": False, + "store_remote": True, + "config": {"directory": self.storage_path}, + } + + loaded = list(load_module(provider_config)) + [ + MediaStorageProviderConfig(False, False, False) + ] + + config.media_storage_providers = [loaded] + + hs = self.setup_test_homeserver(config=config, http_client=client) + + return hs + + def prepare(self, reactor, clock, hs): + + self.media_repo = hs.get_media_repository_resource() + self.download_resource = self.media_repo.children[b'download'] + + # smol png + self.end_content = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + def _req(self, content_disposition): + + request, channel = self.make_request( + "GET", "example.com/12345", shorthand=False + ) + request.render(self.download_resource) + self.pump() + + # We've made one fetch, to example.com, using the media URL, and asking + # the other server not to do a remote fetch + self.assertEqual(len(self.fetches), 1) + self.assertEqual(self.fetches[0][1], "example.com") + self.assertEqual( + self.fetches[0][2], "/_matrix/media/v1/download/example.com/12345" + ) + self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) + + headers = { + b"Content-Length": [b"%d" % (len(self.end_content))], + b"Content-Type": [b'image/png'], + } + if content_disposition: + headers[b"Content-Disposition"] = [content_disposition] + + self.fetches[0][0].callback( + (self.end_content, (len(self.end_content), headers)) + ) + + self.pump() + self.assertEqual(channel.code, 200) + + return channel + + def test_disposition_filename_ascii(self): + """ + If the filename is filename=<ascii> then Synapse will decode it as an + ASCII string, and use filename= in the response. + """ + channel = self._req(b"inline; filename=out.png") + + headers = channel.headers + self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"]) + self.assertEqual( + headers.getRawHeaders(b"Content-Disposition"), [b"inline; filename=out.png"] + ) + + def test_disposition_filenamestar_utf8escaped(self): + """ + If the filename is filename=*utf8''<utf8 escaped> then Synapse will + correctly decode it as the UTF-8 string, and use filename* in the + response. + """ + filename = parse.quote(u"\u2603".encode('utf8')).encode('ascii') + channel = self._req(b"inline; filename*=utf-8''" + filename + b".png") + + headers = channel.headers + self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"]) + self.assertEqual( + headers.getRawHeaders(b"Content-Disposition"), + [b"inline; filename*=utf-8''" + filename + b".png"], + ) + + def test_disposition_none(self): + """ + If there is no filename, one isn't passed on in the Content-Disposition + of the request. + """ + channel = self._req(None) + + headers = channel.headers + self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"]) + self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None) diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 29579cf091..86c813200a 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -162,3 +162,80 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.assertEqual( channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} ) + + def test_non_ascii_preview_httpequiv(self): + + request, channel = self.make_request( + "GET", "url_preview?url=matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # We've made one fetch + self.assertEqual(len(self.fetches), 1) + + end_content = ( + b'<html><head>' + b'<meta http-equiv="Content-Type" content="text/html; charset=windows-1251"/>' + b'<meta property="og:title" content="\xe4\xea\xe0" />' + b'<meta property="og:description" content="hi" />' + b'</head></html>' + ) + + self.fetches[0][0].callback( + ( + end_content, + ( + len(end_content), + { + b"Content-Length": [b"%d" % (len(end_content))], + # This charset=utf-8 should be ignored, because the + # document has a meta tag overriding it. + b"Content-Type": [b'text/html; charset="utf8"'], + }, + "https://example.com", + 200, + ), + ) + ) + + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["og:title"], u"\u0434\u043a\u0430") + + def test_non_ascii_preview_content_type(self): + + request, channel = self.make_request( + "GET", "url_preview?url=matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # We've made one fetch + self.assertEqual(len(self.fetches), 1) + + end_content = ( + b'<html><head>' + b'<meta property="og:title" content="\xe4\xea\xe0" />' + b'<meta property="og:description" content="hi" />' + b'</head></html>' + ) + + self.fetches[0][0].callback( + ( + end_content, + ( + len(end_content), + { + b"Content-Length": [b"%d" % (len(end_content))], + b"Content-Type": [b'text/html; charset="windows-1251"'], + }, + "https://example.com", + 200, + ), + ) + ) + + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["og:title"], u"\u0434\u043a\u0430") diff --git a/tests/server.py b/tests/server.py index 7919a1f124..ceec2f2d4e 100644 --- a/tests/server.py +++ b/tests/server.py @@ -14,6 +14,8 @@ from twisted.internet.error import DNSLookupError from twisted.internet.interfaces import IReactorPluggableNameResolver from twisted.python.failure import Failure from twisted.test.proto_helpers import MemoryReactorClock +from twisted.web.http import unquote +from twisted.web.http_headers import Headers from synapse.http.site import SynapseRequest from synapse.util import Clock @@ -50,6 +52,15 @@ class FakeChannel(object): raise Exception("No result yet.") return int(self.result["code"]) + @property + def headers(self): + if not self.result: + raise Exception("No result yet.") + h = Headers() + for i in self.result["headers"]: + h.addRawHeader(*i) + return h + def writeHeaders(self, version, code, reason, headers): self.result["version"] = version self.result["code"] = code @@ -152,6 +163,9 @@ def make_request( path = b"/_matrix/client/r0/" + path path = path.replace(b"//", b"/") + if not path.startswith(b"/"): + path = b"/" + path + if isinstance(content, text_type): content = content.encode('utf8') @@ -161,6 +175,7 @@ def make_request( req = request(site, channel) req.process = lambda: b"" req.content = BytesIO(content) + req.postpath = list(map(unquote, path[1:].split(b'/'))) if access_token: req.requestHeaders.addRawHeader( diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 832e379a83..8664bc3d54 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -220,3 +220,28 @@ class MonthlyActiveUsersTestCase(HomeserverTestCase): self.store.user_add_threepid(user2, "email", user2_email, now, now) count = self.store.get_registered_reserved_users_count() self.assertEquals(self.get_success(count), len(threepids)) + + def test_track_monthly_users_without_cap(self): + self.hs.config.limit_usage_by_mau = False + self.hs.config.mau_stats_only = True + self.hs.config.max_mau_value = 1 # should not matter + + count = self.store.get_monthly_active_count() + self.assertEqual(0, self.get_success(count)) + + self.store.upsert_monthly_active_user("@user1:server") + self.store.upsert_monthly_active_user("@user2:server") + self.pump() + + count = self.store.get_monthly_active_count() + self.assertEqual(2, self.get_success(count)) + + def test_no_users_when_not_tracking(self): + self.hs.config.limit_usage_by_mau = False + self.hs.config.mau_stats_only = False + self.store.upsert_monthly_active_user = Mock() + + self.store.populate_monthly_active_users("@user:sever") + self.pump() + + self.store.upsert_monthly_active_user.assert_not_called() diff --git a/tests/test_mau.py b/tests/test_mau.py index 0afdeb0818..04f95c942f 100644 --- a/tests/test_mau.py +++ b/tests/test_mau.py @@ -171,6 +171,24 @@ class TestMauLimit(unittest.HomeserverTestCase): self.assertEqual(e.code, 403) self.assertEqual(e.errcode, Codes.RESOURCE_LIMIT_EXCEEDED) + def test_tracked_but_not_limited(self): + self.hs.config.max_mau_value = 1 # should not matter + self.hs.config.limit_usage_by_mau = False + self.hs.config.mau_stats_only = True + + # Simply being able to create 2 users indicates that the + # limit was not reached. + token1 = self.create_user("kermit1") + self.do_sync_for_user(token1) + token2 = self.create_user("kermit2") + self.do_sync_for_user(token2) + + # We do want to verify that the number of tracked users + # matches what we want though + count = self.store.get_monthly_active_count() + self.reactor.advance(100) + self.assertEqual(2, self.successResultOf(count)) + def create_user(self, localpart): request_data = json.dumps( { diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py index 9ecc3ef14f..0968e86a7b 100644 --- a/tests/test_terms_auth.py +++ b/tests/test_terms_auth.py @@ -43,7 +43,7 @@ class TermsTestCase(unittest.HomeserverTestCase): def test_ui_auth(self): self.hs.config.user_consent_at_registration = True self.hs.config.user_consent_policy_name = "My Cool Privacy Policy" - self.hs.config.public_baseurl = "https://example.org" + self.hs.config.public_baseurl = "https://example.org/" self.hs.config.user_consent_version = "1.0" # Do a UI auth request diff --git a/tests/utils.py b/tests/utils.py index 67ab916f30..52ab762010 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -134,6 +134,7 @@ def default_config(name): config.hs_disabled_limit_type = "" config.max_mau_value = 50 config.mau_trial_days = 0 + config.mau_stats_only = False config.mau_limits_reserved_threepids = [] config.admin_contact = None config.rc_messages_per_second = 10000 diff --git a/tox.ini b/tox.ini index 03ddaeb0b7..dfd9afdd49 100644 --- a/tox.ini +++ b/tox.ini @@ -70,7 +70,7 @@ usedevelop=true usedevelop=true deps = {[base]deps} - psycopg2 + psycopg2 setenv = {[base]setenv} SYNAPSE_POSTGRES = 1 @@ -101,11 +101,22 @@ usedevelop=true [testenv:py36] usedevelop=true + +[testenv:py36-coverage] +usedevelop=true +deps = + {[base]deps} + coverage +commands = + /usr/bin/find "{toxinidir}" -name '*.pyc' -delete + python -m coverage run -m twisted.trial {env:TRIAL_FLAGS:} {posargs:tests} {env:TOXSUFFIX:} + + [testenv:py36-postgres] usedevelop=true deps = {[base]deps} - psycopg2 + psycopg2 setenv = {[base]setenv} SYNAPSE_POSTGRES = 1 |