diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 415374a2ce..3e4dea2f19 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -535,7 +535,7 @@ def run(hs):
current_mau_count = 0
reserved_count = 0
store = hs.get_datastore()
- if hs.config.limit_usage_by_mau:
+ if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
current_mau_count = yield store.get_monthly_active_count()
reserved_count = yield store.get_registered_reserved_users_count()
current_mau_gauge.set(float(current_mau_count))
diff --git a/synapse/config/server.py b/synapse/config/server.py
index c1c7c0105e..5ff9ac288d 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -77,6 +77,7 @@ class ServerConfig(Config):
self.max_mau_value = config.get(
"max_mau_value", 0,
)
+ self.mau_stats_only = config.get("mau_stats_only", False)
self.mau_limits_reserved_threepids = config.get(
"mau_limit_reserved_threepids", []
@@ -372,6 +373,11 @@ class ServerConfig(Config):
# max_mau_value: 50
# mau_trial_days: 2
#
+ # If enabled, the metrics for the number of monthly active users will
+ # be populated, however no one will be limited. If limit_usage_by_mau
+ # is true, this is implied to be true.
+ # mau_stats_only: False
+ #
# Sometimes the server admin will want to ensure certain accounts are
# never blocked by mau checking. These accounts are specified here.
#
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index a958c45271..c6e89db4bc 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -473,7 +473,7 @@ class AuthHandler(BaseHandler):
"version": self.hs.config.user_consent_version,
"en": {
"name": self.hs.config.user_consent_policy_name,
- "url": "%s/_matrix/consent?v=%s" % (
+ "url": "%s_matrix/consent?v=%s" % (
self.hs.config.public_baseurl,
self.hs.config.user_consent_version,
),
diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/v2_alpha/auth.py
index a8d8ed6590..fa73bdf3a1 100644
--- a/synapse/rest/client/v2_alpha/auth.py
+++ b/synapse/rest/client/v2_alpha/auth.py
@@ -21,7 +21,7 @@ from synapse.api.constants import LoginType
from synapse.api.errors import SynapseError
from synapse.api.urls import CLIENT_V2_ALPHA_PREFIX
from synapse.http.server import finish_request
-from synapse.http.servlet import RestServlet
+from synapse.http.servlet import RestServlet, parse_string
from ._base import client_v2_patterns
@@ -131,16 +131,12 @@ class AuthRestServlet(RestServlet):
self.auth_handler = hs.get_auth_handler()
self.registration_handler = hs.get_handlers().registration_handler
- @defer.inlineCallbacks
def on_GET(self, request, stagetype):
- yield
- if stagetype == LoginType.RECAPTCHA:
- if ('session' not in request.args or
- len(request.args['session']) == 0):
- raise SynapseError(400, "No session supplied")
-
- session = request.args["session"][0]
+ session = parse_string(request, "session")
+ if not session:
+ raise SynapseError(400, "No session supplied")
+ if stagetype == LoginType.RECAPTCHA:
html = RECAPTCHA_TEMPLATE % {
'session': session,
'myurl': "%s/auth/%s/fallback/web" % (
@@ -155,13 +151,11 @@ class AuthRestServlet(RestServlet):
request.write(html_bytes)
finish_request(request)
- defer.returnValue(None)
+ return None
elif stagetype == LoginType.TERMS:
- session = request.args['session'][0]
-
html = TERMS_TEMPLATE % {
'session': session,
- 'terms_url': "%s/_matrix/consent?v=%s" % (
+ 'terms_url': "%s_matrix/consent?v=%s" % (
self.hs.config.public_baseurl,
self.hs.config.user_consent_version,
),
@@ -176,25 +170,25 @@ class AuthRestServlet(RestServlet):
request.write(html_bytes)
finish_request(request)
- defer.returnValue(None)
+ return None
else:
raise SynapseError(404, "Unknown auth stage type")
@defer.inlineCallbacks
def on_POST(self, request, stagetype):
- yield
+
+ session = parse_string(request, "session")
+ if not session:
+ raise SynapseError(400, "No session supplied")
+
if stagetype == LoginType.RECAPTCHA:
- if ('g-recaptcha-response' not in request.args or
- len(request.args['g-recaptcha-response'])) == 0:
- raise SynapseError(400, "No captcha response supplied")
- if ('session' not in request.args or
- len(request.args['session'])) == 0:
- raise SynapseError(400, "No session supplied")
+ response = parse_string(request, "g-recaptcha-response")
- session = request.args['session'][0]
+ if not response:
+ raise SynapseError(400, "No captcha response supplied")
authdict = {
- 'response': request.args['g-recaptcha-response'][0],
+ 'response': response,
'session': session,
}
@@ -242,7 +236,7 @@ class AuthRestServlet(RestServlet):
else:
html = TERMS_TEMPLATE % {
'session': session,
- 'terms_url': "%s/_matrix/consent?v=%s" % (
+ 'terms_url': "%s_matrix/consent?v=%s" % (
self.hs.config.public_baseurl,
self.hs.config.user_consent_version,
),
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 76e479afa3..efe42a429d 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -16,6 +16,7 @@
import logging
import os
+from six import PY3
from six.moves import urllib
from twisted.internet import defer
@@ -48,26 +49,21 @@ def parse_media_id(request):
return server_name, media_id, file_name
except Exception:
raise SynapseError(
- 404,
- "Invalid media id token %r" % (request.postpath,),
- Codes.UNKNOWN,
+ 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
)
def respond_404(request):
respond_with_json(
- request, 404,
- cs_error(
- "Not found %r" % (request.postpath,),
- code=Codes.NOT_FOUND,
- ),
- send_cors=True
+ request,
+ 404,
+ cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
+ send_cors=True,
)
@defer.inlineCallbacks
-def respond_with_file(request, media_type, file_path,
- file_size=None, upload_name=None):
+def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None):
logger.debug("Responding with %r", file_path)
if os.path.isfile(file_path):
@@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name):
file_size (int): Size in bytes of the media, if known.
upload_name (str): The name of the requested file, if any.
"""
+
def _quote(x):
return urllib.parse.quote(x.encode("utf-8"))
request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
if upload_name:
if is_ascii(upload_name):
- disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii")
+ disposition = "inline; filename=%s" % (_quote(upload_name),)
else:
- disposition = (
- "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii")
+ disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
- request.setHeader(b"Content-Disposition", disposition)
+ request.setHeader(b"Content-Disposition", disposition.encode('ascii'))
# cache for at least a day.
# XXX: we might want to turn this off for data we don't want to
# recommend caching as it's sensitive or private - or at least
# select private. don't bother setting Expires as all our
# clients are smart enough to be happy with Cache-Control
- request.setHeader(
- b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
- )
-
- request.setHeader(
- b"Content-Length", b"%d" % (file_size,)
- )
+ request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
+ request.setHeader(b"Content-Length", b"%d" % (file_size,))
@defer.inlineCallbacks
@@ -153,6 +144,7 @@ class Responder(object):
Responder is a context manager which *must* be used, so that any resources
held can be cleaned up.
"""
+
def write_to_consumer(self, consumer):
"""Stream response into consumer
@@ -186,9 +178,18 @@ class FileInfo(object):
thumbnail_method (str)
thumbnail_type (str): Content type of thumbnail, e.g. image/png
"""
- def __init__(self, server_name, file_id, url_cache=False,
- thumbnail=False, thumbnail_width=None, thumbnail_height=None,
- thumbnail_method=None, thumbnail_type=None):
+
+ def __init__(
+ self,
+ server_name,
+ file_id,
+ url_cache=False,
+ thumbnail=False,
+ thumbnail_width=None,
+ thumbnail_height=None,
+ thumbnail_method=None,
+ thumbnail_type=None,
+ ):
self.server_name = server_name
self.file_id = file_id
self.url_cache = url_cache
@@ -197,3 +198,74 @@ class FileInfo(object):
self.thumbnail_height = thumbnail_height
self.thumbnail_method = thumbnail_method
self.thumbnail_type = thumbnail_type
+
+
+def get_filename_from_headers(headers):
+ """
+ Get the filename of the downloaded file by inspecting the
+ Content-Disposition HTTP header.
+
+ Args:
+ headers (twisted.web.http_headers.Headers): The HTTP
+ request headers.
+
+ Returns:
+ A Unicode string of the filename, or None.
+ """
+ content_disposition = headers.get(b"Content-Disposition", [b''])
+
+ # No header, bail out.
+ if not content_disposition[0]:
+ return
+
+ # dict of unicode: bytes, corresponding to the key value sections of the
+ # Content-Disposition header.
+ params = {}
+ parts = content_disposition[0].split(b";")
+ for i in parts:
+ # Split into key-value pairs, if able
+ # We don't care about things like `inline`, so throw it out
+ if b"=" not in i:
+ continue
+
+ key, value = i.strip().split(b"=")
+ params[key.decode('ascii')] = value
+
+ upload_name = None
+
+ # First check if there is a valid UTF-8 filename
+ upload_name_utf8 = params.get("filename*", None)
+ if upload_name_utf8:
+ if upload_name_utf8.lower().startswith(b"utf-8''"):
+ upload_name_utf8 = upload_name_utf8[7:]
+ # We have a filename*= section. This MUST be ASCII, and any UTF-8
+ # bytes are %-quoted.
+ if PY3:
+ try:
+ # Once it is decoded, we can then unquote the %-encoded
+ # parts strictly into a unicode string.
+ upload_name = urllib.parse.unquote(
+ upload_name_utf8.decode('ascii'), errors="strict"
+ )
+ except UnicodeDecodeError:
+ # Incorrect UTF-8.
+ pass
+ else:
+ # On Python 2, we first unquote the %-encoded parts and then
+ # decode it strictly using UTF-8.
+ try:
+ upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8')
+ except UnicodeDecodeError:
+ pass
+
+ # If there isn't check for an ascii name.
+ if not upload_name:
+ upload_name_ascii = params.get("filename", None)
+ if upload_name_ascii and is_ascii(upload_name_ascii):
+ # Make sure there's no %-quoted bytes. If there is, reject it as
+ # non-valid ASCII.
+ if b"%" not in upload_name_ascii:
+ upload_name = upload_name_ascii.decode('ascii')
+
+ # This may be None here, indicating we did not find a matching name.
+ return upload_name
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index d6c5f07af0..e117836e9a 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -14,14 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import cgi
import errno
import logging
import os
import shutil
-from six import PY3, iteritems
-from six.moves.urllib import parse as urlparse
+from six import iteritems
import twisted.internet.error
import twisted.web.http
@@ -34,14 +32,18 @@ from synapse.api.errors import (
NotFoundError,
SynapseError,
)
-from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.util import logcontext
from synapse.util.async_helpers import Linearizer
from synapse.util.retryutils import NotRetryingDestination
-from synapse.util.stringutils import is_ascii, random_string
+from synapse.util.stringutils import random_string
-from ._base import FileInfo, respond_404, respond_with_responder
+from ._base import (
+ FileInfo,
+ get_filename_from_headers,
+ respond_404,
+ respond_with_responder,
+)
from .config_resource import MediaConfigResource
from .download_resource import DownloadResource
from .filepath import MediaFilePaths
@@ -62,7 +64,7 @@ class MediaRepository(object):
def __init__(self, hs):
self.hs = hs
self.auth = hs.get_auth()
- self.client = MatrixFederationHttpClient(hs)
+ self.client = hs.get_http_client()
self.clock = hs.get_clock()
self.server_name = hs.hostname
self.store = hs.get_datastore()
@@ -397,39 +399,9 @@ class MediaRepository(object):
yield finish()
media_type = headers[b"Content-Type"][0].decode('ascii')
-
+ upload_name = get_filename_from_headers(headers)
time_now_ms = self.clock.time_msec()
- content_disposition = headers.get(b"Content-Disposition", None)
- if content_disposition:
- _, params = cgi.parse_header(content_disposition[0].decode('ascii'),)
- upload_name = None
-
- # First check if there is a valid UTF-8 filename
- upload_name_utf8 = params.get("filename*", None)
- if upload_name_utf8:
- if upload_name_utf8.lower().startswith("utf-8''"):
- upload_name = upload_name_utf8[7:]
-
- # If there isn't check for an ascii name.
- if not upload_name:
- upload_name_ascii = params.get("filename", None)
- if upload_name_ascii and is_ascii(upload_name_ascii):
- upload_name = upload_name_ascii
-
- if upload_name:
- if PY3:
- upload_name = urlparse.unquote(upload_name)
- else:
- upload_name = urlparse.unquote(upload_name.encode('ascii'))
- try:
- if isinstance(upload_name, bytes):
- upload_name = upload_name.decode("utf-8")
- except UnicodeDecodeError:
- upload_name = None
- else:
- upload_name = None
-
logger.info("Stored remote media in file %r", fname)
yield self.store.store_cached_remote_media(
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 91d1dafe64..d0ecf241b6 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import cgi
import datetime
import errno
import fnmatch
@@ -44,15 +43,19 @@ from synapse.http.server import (
)
from synapse.http.servlet import parse_integer, parse_string
from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.rest.media.v1._base import get_filename_from_headers
from synapse.util.async_helpers import ObservableDeferred
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.logcontext import make_deferred_yieldable, run_in_background
-from synapse.util.stringutils import is_ascii, random_string
+from synapse.util.stringutils import random_string
from ._base import FileInfo
logger = logging.getLogger(__name__)
+_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
+_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
+
class PreviewUrlResource(Resource):
isLeaf = True
@@ -223,15 +226,25 @@ class PreviewUrlResource(Resource):
with open(media_info['filename'], 'rb') as file:
body = file.read()
- # clobber the encoding from the content-type, or default to utf-8
- # XXX: this overrides any <meta/> or XML charset headers in the body
- # which may pose problems, but so far seems to work okay.
- match = re.match(
- r'.*; *charset="?(.*?)"?(;|$)',
- media_info['media_type'],
- re.I
- )
- encoding = match.group(1) if match else "utf-8"
+ encoding = None
+
+ # Let's try and figure out if it has an encoding set in a meta tag.
+ # Limit it to the first 1kb, since it ought to be in the meta tags
+ # at the top.
+ match = _charset_match.search(body[:1000])
+
+ # If we find a match, it should take precedence over the
+ # Content-Type header, so set it here.
+ if match:
+ encoding = match.group(1).decode('ascii')
+
+ # If we don't find a match, we'll look at the HTTP Content-Type, and
+ # if that doesn't exist, we'll fall back to UTF-8.
+ if not encoding:
+ match = _content_type_match.match(
+ media_info['media_type']
+ )
+ encoding = match.group(1) if match else "utf-8"
og = decode_and_calc_og(body, media_info['uri'], encoding)
@@ -323,31 +336,7 @@ class PreviewUrlResource(Resource):
media_type = "application/octet-stream"
time_now_ms = self.clock.time_msec()
- content_disposition = headers.get(b"Content-Disposition", None)
- if content_disposition:
- _, params = cgi.parse_header(content_disposition[0],)
- download_name = None
-
- # First check if there is a valid UTF-8 filename
- download_name_utf8 = params.get("filename*", None)
- if download_name_utf8:
- if download_name_utf8.lower().startswith("utf-8''"):
- download_name = download_name_utf8[7:]
-
- # If there isn't check for an ascii name.
- if not download_name:
- download_name_ascii = params.get("filename", None)
- if download_name_ascii and is_ascii(download_name_ascii):
- download_name = download_name_ascii
-
- if download_name:
- download_name = urlparse.unquote(download_name)
- try:
- download_name = download_name.decode("utf-8")
- except UnicodeDecodeError:
- download_name = None
- else:
- download_name = None
+ download_name = get_filename_from_headers(headers)
yield self.store.store_local_media(
media_id=file_id,
diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py
index cf4104dc2e..c353b11c9a 100644
--- a/synapse/storage/monthly_active_users.py
+++ b/synapse/storage/monthly_active_users.py
@@ -96,37 +96,38 @@ class MonthlyActiveUsersStore(SQLBaseStore):
txn.execute(sql, query_args)
- # If MAU user count still exceeds the MAU threshold, then delete on
- # a least recently active basis.
- # Note it is not possible to write this query using OFFSET due to
- # incompatibilities in how sqlite and postgres support the feature.
- # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
- # While Postgres does not require 'LIMIT', but also does not support
- # negative LIMIT values. So there is no way to write it that both can
- # support
- safe_guard = self.hs.config.max_mau_value - len(self.reserved_users)
- # Must be greater than zero for postgres
- safe_guard = safe_guard if safe_guard > 0 else 0
- query_args = [safe_guard]
-
- base_sql = """
- DELETE FROM monthly_active_users
- WHERE user_id NOT IN (
- SELECT user_id FROM monthly_active_users
- ORDER BY timestamp DESC
- LIMIT ?
+ if self.hs.config.limit_usage_by_mau:
+ # If MAU user count still exceeds the MAU threshold, then delete on
+ # a least recently active basis.
+ # Note it is not possible to write this query using OFFSET due to
+ # incompatibilities in how sqlite and postgres support the feature.
+ # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
+ # While Postgres does not require 'LIMIT', but also does not support
+ # negative LIMIT values. So there is no way to write it that both can
+ # support
+ safe_guard = self.hs.config.max_mau_value - len(self.reserved_users)
+ # Must be greater than zero for postgres
+ safe_guard = safe_guard if safe_guard > 0 else 0
+ query_args = [safe_guard]
+
+ base_sql = """
+ DELETE FROM monthly_active_users
+ WHERE user_id NOT IN (
+ SELECT user_id FROM monthly_active_users
+ ORDER BY timestamp DESC
+ LIMIT ?
+ )
+ """
+ # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
+ # when len(reserved_users) == 0. Works fine on sqlite.
+ if len(self.reserved_users) > 0:
+ query_args.extend(self.reserved_users)
+ sql = base_sql + """ AND user_id NOT IN ({})""".format(
+ ','.join(questionmarks)
)
- """
- # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
- # when len(reserved_users) == 0. Works fine on sqlite.
- if len(self.reserved_users) > 0:
- query_args.extend(self.reserved_users)
- sql = base_sql + """ AND user_id NOT IN ({})""".format(
- ','.join(questionmarks)
- )
- else:
- sql = base_sql
- txn.execute(sql, query_args)
+ else:
+ sql = base_sql
+ txn.execute(sql, query_args)
yield self.runInteraction("reap_monthly_active_users", _reap_users)
# It seems poor to invalidate the whole cache, Postgres supports
@@ -252,8 +253,7 @@ class MonthlyActiveUsersStore(SQLBaseStore):
Args:
user_id(str): the user_id to query
"""
-
- if self.hs.config.limit_usage_by_mau:
+ if self.hs.config.limit_usage_by_mau or self.hs.config.mau_stats_only:
# Trial users and guests should not be included as part of MAU group
is_guest = yield self.is_guest(user_id)
if is_guest:
@@ -271,8 +271,14 @@ class MonthlyActiveUsersStore(SQLBaseStore):
# but only update if we have not previously seen the user for
# LAST_SEEN_GRANULARITY ms
if last_seen_timestamp is None:
- count = yield self.get_monthly_active_count()
- if count < self.hs.config.max_mau_value:
+ # In the case where mau_stats_only is True and limit_usage_by_mau is
+ # False, there is no point in checking get_monthly_active_count - it
+ # adds no value and will break the logic if max_mau_value is exceeded.
+ if not self.hs.config.limit_usage_by_mau:
yield self.upsert_monthly_active_user(user_id)
+ else:
+ count = yield self.get_monthly_active_count()
+ if count < self.hs.config.max_mau_value:
+ yield self.upsert_monthly_active_user(user_id)
elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY:
yield self.upsert_monthly_active_user(user_id)
|