summary refs log tree commit diff
path: root/synapse/types.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/types.py')
-rw-r--r--synapse/types.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/synapse/types.py b/synapse/types.py

index 41afb27a74..d8cb64addb 100644 --- a/synapse/types.py +++ b/synapse/types.py
@@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re import string from collections import namedtuple @@ -228,6 +229,71 @@ def contains_invalid_mxid_characters(localpart): return any(c not in mxid_localpart_allowed_characters for c in localpart) +UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") + +# the following is a pattern which matches '=', and bytes which are not allowed in a mxid +# localpart. +# +# It works by: +# * building a string containing the allowed characters (excluding '=') +# * escaping every special character with a backslash (to stop '-' being interpreted as a +# range operator) +# * wrapping it in a '[^...]' regex +# * converting the whole lot to a 'bytes' sequence, so that we can use it to match +# bytes rather than strings +# +NON_MXID_CHARACTER_PATTERN = re.compile( + ("[^%s]" % ( + re.escape("".join(mxid_localpart_allowed_characters - {"="}),), + )).encode("ascii"), +) + + +def map_username_to_mxid_localpart(username, case_sensitive=False): + """Map a username onto a string suitable for a MXID + + This follows the algorithm laid out at + https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets. + + Args: + username (unicode|bytes): username to be mapped + case_sensitive (bool): true if TEST and test should be mapped + onto different mxids + + Returns: + unicode: string suitable for a mxid localpart + """ + if not isinstance(username, bytes): + username = username.encode('utf-8') + + # first we sort out upper-case characters + if case_sensitive: + def f1(m): + return b"_" + m.group().lower() + + username = UPPER_CASE_PATTERN.sub(f1, username) + else: + username = username.lower() + + # then we sort out non-ascii characters + def f2(m): + g = m.group()[0] + if isinstance(g, str): + # on python 2, we need to do a ord(). On python 3, the + # byte itself will do. + g = ord(g) + return b"=%02x" % (g,) + + username = NON_MXID_CHARACTER_PATTERN.sub(f2, username) + + # we also do the =-escaping to mxids starting with an underscore. + username = re.sub(b'^_', b'=5f', username) + + # we should now only have ascii bytes left, so can decode back to a + # unicode. + return username.decode('ascii') + + class StreamToken( namedtuple("Token", ( "room_key",