summary refs log tree commit diff
path: root/synapse/util/stringutils.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/util/stringutils.py')
-rw-r--r--synapse/util/stringutils.py102
1 files changed, 35 insertions, 67 deletions
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 982c6d81ca..08c86e92b8 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,16 +13,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import itertools
 import random
+import re
 import string
+from collections import Iterable
 
-import six
-from six import PY2, PY3
-from six.moves import range
+from synapse.api.errors import Codes, SynapseError
 
 _string_with_symbols = string.digits + string.ascii_letters + ".,;:^&*-_+=#~@"
 
+# https://matrix.org/docs/spec/client_server/r0.6.0#post-matrix-client-r0-register-email-requesttoken
+# Note: The : character is allowed here for older clients, but will be removed in a
+# future release. Context: https://github.com/matrix-org/synapse/issues/6766
+client_secret_regex = re.compile(r"^[0-9a-zA-Z\.\=\_\-\:]+$")
+
 # random_string and random_string_with_symbols are used for a range of things,
 # some cryptographically important, some less so. We use SystemRandom to make sure
 # we get cryptographically-secure randoms.
@@ -37,75 +43,37 @@ def random_string_with_symbols(length):
 
 
 def is_ascii(s):
-
-    if PY3:
-        if isinstance(s, bytes):
-            try:
-                s.decode("ascii").encode("ascii")
-            except UnicodeDecodeError:
-                return False
-            except UnicodeEncodeError:
-                return False
-            return True
-
-    try:
-        s.encode("ascii")
-    except UnicodeEncodeError:
-        return False
-    except UnicodeDecodeError:
-        return False
-    else:
+    if isinstance(s, bytes):
+        try:
+            s.decode("ascii").encode("ascii")
+        except UnicodeDecodeError:
+            return False
+        except UnicodeEncodeError:
+            return False
         return True
 
 
-def to_ascii(s):
-    """Converts a string to ascii if it is ascii, otherwise leave it alone.
-
-    If given None then will return None.
-    """
-    if PY3:
-        return s
-
-    if s is None:
-        return None
+def assert_valid_client_secret(client_secret):
+    """Validate that a given string matches the client_secret regex defined by the spec"""
+    if client_secret_regex.match(client_secret) is None:
+        raise SynapseError(
+            400, "Invalid client_secret parameter", errcode=Codes.INVALID_PARAM
+        )
 
-    try:
-        return s.encode("ascii")
-    except UnicodeEncodeError:
-        return s
 
+def shortstr(iterable: Iterable, maxitems: int = 5) -> str:
+    """If iterable has maxitems or fewer, return the stringification of a list
+    containing those items.
 
-def exception_to_unicode(e):
-    """Helper function to extract the text of an exception as a unicode string
+    Otherwise, return the stringification of a a list with the first maxitems items,
+    followed by "...".
 
     Args:
-        e (Exception): exception to be stringified
-
-    Returns:
-        unicode
+        iterable: iterable to truncate
+        maxitems: number of items to return before truncating
     """
-    # urgh, this is a mess. The basic problem here is that psycopg2 constructs its
-    # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will
-    # then produce the raw byte sequence. Under Python 2, this will then cause another
-    # error if it gets mixed with a `unicode` object, as per
-    # https://github.com/matrix-org/synapse/issues/4252
-
-    # First of all, if we're under python3, everything is fine because it will sort this
-    # nonsense out for us.
-    if not PY2:
-        return str(e)
-
-    # otherwise let's have a stab at decoding the exception message. We'll circumvent
-    # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii')
-    # and instead look at what is in the args member.
-
-    if len(e.args) == 0:
-        return ""
-    elif len(e.args) > 1:
-        return six.text_type(repr(e.args))
-
-    msg = e.args[0]
-    if isinstance(msg, bytes):
-        return msg.decode("utf-8", errors="replace")
-    else:
-        return msg
+
+    items = list(itertools.islice(iterable, maxitems + 1))
+    if len(items) <= maxitems:
+        return str(items)
+    return "[" + ", ".join(repr(r) for r in items[:maxitems]) + ", ...]"