summary refs log tree commit diff
diff options
context:
space:
mode:
authorMark Haines <mark.haines@matrix.org>2015-08-28 11:15:27 +0100
committerMark Haines <mark.haines@matrix.org>2015-08-28 11:15:27 +0100
commita7122692d972ce3ac787d4ecf1449f87f33e83cf (patch)
tree81ece372c764d775d1d10cc581100de006456d67
parentMerge pull request #254 from matrix-org/markjh/tox_setuptools (diff)
parentBump version and changelog (diff)
downloadsynapse-a7122692d972ce3ac787d4ecf1449f87f33e83cf.tar.xz
Merge branch 'release-v0.10.0' into develop
Conflicts:
	synapse/handlers/auth.py
	synapse/python_dependencies.py
	synapse/rest/client/v1/login.py
-rw-r--r--CHANGES.rst44
-rw-r--r--synapse/__init__.py2
-rwxr-xr-xsynapse/app/homeserver.py29
-rw-r--r--synapse/config/__main__.py30
-rw-r--r--synapse/config/_base.py57
-rw-r--r--synapse/config/server.py1
-rw-r--r--synapse/handlers/_base.py32
-rw-r--r--synapse/handlers/events.py10
-rw-r--r--synapse/handlers/typing.py14
-rw-r--r--synapse/notifier.py7
-rw-r--r--synapse/push/__init__.py8
-rw-r--r--synapse/python_dependencies.py12
-rw-r--r--synapse/rest/media/v1/base_resource.py54
-rw-r--r--synapse/rest/media/v1/upload_resource.py12
-rw-r--r--synapse/storage/event_federation.py10
-rw-r--r--synapse/storage/state.py11
-rw-r--r--synapse/util/stringutils.py2
17 files changed, 262 insertions, 73 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index e6d1a37307..8b9916c960 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,4 +1,43 @@
-Changes in synapse v0.10.0-rc1 (2015-08-20)
+Changes in synapse v0.10.0-rc5 (2015-08-27)
+===========================================
+
+* Fix bug that broke downloading files with ascii filenames across federation.
+
+Changes in synapse v0.10.0-rc4 (2015-08-27)
+===========================================
+
+* Allow UTF-8 filenames for upload. (PR #259)
+
+Changes in synapse v0.10.0-rc3 (2015-08-25)
+===========================================
+
+* Add ``--keys-directory`` config option to specify where files such as
+  certs and signing keys should be stored in, when using ``--generate-config``
+  or ``--generate-keys``. (PR #250)
+* Allow ``--config-path`` to specify a directory, causing synapse to use all
+  \*.yaml files in the directory as config files. (PR #249)
+* Add ``web_client_location`` config option to specify static files to be
+  hosted by synapse under ``/_matrix/client``. (PR #245)
+* Add helper utility to synapse to read and parse the config files and extract
+  the value of a given key. For example::
+
+    $ python -m synapse.config read server_name -c homeserver.yaml
+    localhost
+
+  (PR #246)
+
+
+Changes in synapse v0.10.0-rc2 (2015-08-24)
+===========================================
+
+* Fix bug where we incorrectly populated the ``event_forward_extremities``
+  table, resulting in problems joining large remote rooms (e.g.
+  ``#matrix:matrix.org``)
+* Reduce the number of times we wake up pushers by not listening for presence
+  or typing events, reducing the CPU cost of each pusher.
+
+
+Changes in synapse v0.10.0-rc1 (2015-08-21)
 ===========================================
 
 Also see v0.9.4-rc1 changelog, which has been amalgamated into this release.
@@ -10,6 +49,9 @@ General:
   (PR #208)
 * Add support for logging in with email address (PR #234)
 * Add support for new ``m.room.canonical_alias`` event. (PR #233)
+* Change synapse to treat user IDs case insensitively during registration and
+  login. (If two users already exist with case insensitive matching user ids,
+  synapse will continue to require them to specify their user ids exactly.)
 * Error if a user tries to register with an email already in use. (PR #211)
 * Add extra and improve existing caches  (PR #212, #219, #226, #228)
 * Batch various storage request (PR #226, #228)
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 5853165a21..57b8304d35 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -16,4 +16,4 @@
 """ This is a reference implementation of a Matrix home server.
 """
 
-__version__ = "0.10.0-rc1"
+__version__ = "0.10.0-rc5"
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index f04493f92a..ff7807c2e6 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -16,7 +16,7 @@
 
 import sys
 sys.dont_write_bytecode = True
-from synapse.python_dependencies import check_requirements
+from synapse.python_dependencies import check_requirements, DEPENDENCY_LINKS
 
 if __name__ == '__main__':
     check_requirements()
@@ -97,9 +97,25 @@ class SynapseHomeServer(HomeServer):
         return JsonResource(self)
 
     def build_resource_for_web_client(self):
-        import syweb
-        syweb_path = os.path.dirname(syweb.__file__)
-        webclient_path = os.path.join(syweb_path, "webclient")
+        webclient_path = self.get_config().web_client_location
+        if not webclient_path:
+            try:
+                import syweb
+            except ImportError:
+                quit_with_error(
+                    "Could not find a webclient.\n\n"
+                    "Please either install the matrix-angular-sdk or configure\n"
+                    "the location of the source to serve via the configuration\n"
+                    "option `web_client_location`\n\n"
+                    "To install the `matrix-angular-sdk` via pip, run:\n\n"
+                    "    pip install '%(dep)s'\n"
+                    "\n"
+                    "You can also disable hosting of the webclient via the\n"
+                    "configuration option `web_client`\n"
+                    % {"dep": DEPENDENCY_LINKS["matrix-angular-sdk"]}
+                )
+            syweb_path = os.path.dirname(syweb.__file__)
+            webclient_path = os.path.join(syweb_path, "webclient")
         # GZip is disabled here due to
         # https://twistedmatrix.com/trac/ticket/7678
         # (It can stay enabled for the API resources: they call
@@ -259,11 +275,10 @@ class SynapseHomeServer(HomeServer):
 
 def quit_with_error(error_string):
     message_lines = error_string.split("\n")
-    line_length = max([len(l) for l in message_lines]) + 2
+    line_length = max([len(l) for l in message_lines if len(l) < 80]) + 2
     sys.stderr.write("*" * line_length + '\n')
     for line in message_lines:
-        if line.strip():
-            sys.stderr.write(" %s\n" % (line.strip(),))
+        sys.stderr.write(" %s\n" % (line.rstrip(),))
     sys.stderr.write("*" * line_length + '\n')
     sys.exit(1)
 
diff --git a/synapse/config/__main__.py b/synapse/config/__main__.py
new file mode 100644
index 0000000000..f822d12036
--- /dev/null
+++ b/synapse/config/__main__.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if __name__ == "__main__":
+    import sys
+    from homeserver import HomeServerConfig
+
+    action = sys.argv[1]
+
+    if action == "read":
+        key = sys.argv[2]
+        config = HomeServerConfig.load_config("", sys.argv[3:])
+
+        print getattr(config, key)
+        sys.exit(0)
+    else:
+        sys.stderr.write("Unknown command %r\n" % (action,))
+        sys.exit(1)
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 73f6959959..d01235d31f 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -131,7 +131,8 @@ class Config(object):
             "-c", "--config-path",
             action="append",
             metavar="CONFIG_FILE",
-            help="Specify config file"
+            help="Specify config file. Can be given multiple times and"
+                 " may specify directories containing *.yaml files."
         )
         config_parser.add_argument(
             "--generate-config",
@@ -144,6 +145,13 @@ class Config(object):
             help="Generate any missing key files then exit"
         )
         config_parser.add_argument(
+            "--keys-directory",
+            metavar="DIRECTORY",
+            help="Used with 'generate-*' options to specify where files such as"
+                 " certs and signing keys should be stored in, unless explicitly"
+                 " specified in the config."
+        )
+        config_parser.add_argument(
             "-H", "--server-name",
             help="The server name to generate a config file for"
         )
@@ -151,16 +159,46 @@ class Config(object):
 
         generate_keys = config_args.generate_keys
 
+        config_files = []
+        if config_args.config_path:
+            for config_path in config_args.config_path:
+                if os.path.isdir(config_path):
+                    # We accept specifying directories as config paths, we search
+                    # inside that directory for all files matching *.yaml, and then
+                    # we apply them in *sorted* order.
+                    files = []
+                    for entry in os.listdir(config_path):
+                        entry_path = os.path.join(config_path, entry)
+                        if not os.path.isfile(entry_path):
+                            print (
+                                "Found subdirectory in config directory: %r. IGNORING."
+                            ) % (entry_path, )
+                            continue
+
+                        if not entry.endswith(".yaml"):
+                            print (
+                                "Found file in config directory that does not"
+                                " end in '.yaml': %r. IGNORING."
+                            ) % (entry_path, )
+                            continue
+
+                    config_files.extend(sorted(files))
+                else:
+                    config_files.append(config_path)
+
         if config_args.generate_config:
-            if not config_args.config_path:
+            if not config_files:
                 config_parser.error(
                     "Must supply a config file.\nA config file can be automatically"
                     " generated using \"--generate-config -H SERVER_NAME"
                     " -c CONFIG-FILE\""
                 )
-            (config_path,) = config_args.config_path
+            (config_path,) = config_files
             if not os.path.exists(config_path):
-                config_dir_path = os.path.dirname(config_path)
+                if config_args.keys_directory:
+                    config_dir_path = config_args.keys_directory
+                else:
+                    config_dir_path = os.path.dirname(config_path)
                 config_dir_path = os.path.abspath(config_dir_path)
 
                 server_name = config_args.server_name
@@ -202,19 +240,22 @@ class Config(object):
         obj.invoke_all("add_arguments", parser)
         args = parser.parse_args(remaining_args)
 
-        if not config_args.config_path:
+        if not config_files:
             config_parser.error(
                 "Must supply a config file.\nA config file can be automatically"
                 " generated using \"--generate-config -H SERVER_NAME"
                 " -c CONFIG-FILE\""
             )
 
-        config_dir_path = os.path.dirname(config_args.config_path[-1])
+        if config_args.keys_directory:
+            config_dir_path = config_args.keys_directory
+        else:
+            config_dir_path = os.path.dirname(config_args.config_path[-1])
         config_dir_path = os.path.abspath(config_dir_path)
 
         specified_config = {}
-        for config_path in config_args.config_path:
-            yaml_config = cls.read_config_file(config_path)
+        for config_file in config_files:
+            yaml_config = cls.read_config_file(config_file)
             specified_config.update(yaml_config)
 
         server_name = specified_config["server_name"]
diff --git a/synapse/config/server.py b/synapse/config/server.py
index f9a3b5f15b..a03e55c223 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -22,6 +22,7 @@ class ServerConfig(Config):
         self.server_name = config["server_name"]
         self.pid_file = self.abspath(config.get("pid_file"))
         self.web_client = config["web_client"]
+        self.web_client_location = config.get("web_client_location", None)
         self.soft_file_limit = config["soft_file_limit"]
         self.daemonize = config.get("daemonize")
         self.print_pidfile = config.get("print_pidfile")
diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py
index e91f1129db..cb992143f5 100644
--- a/synapse/handlers/_base.py
+++ b/synapse/handlers/_base.py
@@ -107,6 +107,22 @@ class BaseHandler(object):
         if not suppress_auth:
             self.auth.check(event, auth_events=context.current_state)
 
+        if event.type == EventTypes.CanonicalAlias:
+            # Check the alias is acually valid (at this time at least)
+            room_alias_str = event.content.get("alias", None)
+            if room_alias_str:
+                room_alias = RoomAlias.from_string(room_alias_str)
+                directory_handler = self.hs.get_handlers().directory_handler
+                mapping = yield directory_handler.get_association(room_alias)
+
+                if mapping["room_id"] != event.room_id:
+                    raise SynapseError(
+                        400,
+                        "Room alias %s does not point to the room" % (
+                            room_alias_str,
+                        )
+                    )
+
         (event_stream_id, max_stream_id) = yield self.store.persist_event(
             event, context=context
         )
@@ -130,22 +146,6 @@ class BaseHandler(object):
                         returned_invite.signatures
                     )
 
-        if event.type == EventTypes.CanonicalAlias:
-            # Check the alias is acually valid (at this time at least)
-            room_alias_str = event.content.get("alias", None)
-            if room_alias_str:
-                room_alias = RoomAlias.from_string(room_alias_str)
-                directory_handler = self.hs.get_handlers().directory_handler
-                mapping = yield directory_handler.get_association(room_alias)
-
-                if mapping["room_id"] != event.room_id:
-                    raise SynapseError(
-                        400,
-                        "Room alias %s does not point to the room" % (
-                            room_alias_str,
-                        )
-                    )
-
         destinations = set(extra_destinations)
         for k, s in context.current_state.items():
             try:
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index f9ca2f8634..891502c04f 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -49,7 +49,12 @@ class EventStreamHandler(BaseHandler):
     @defer.inlineCallbacks
     @log_function
     def get_stream(self, auth_user_id, pagin_config, timeout=0,
-                   as_client_event=True, affect_presence=True):
+                   as_client_event=True, affect_presence=True,
+                   only_room_events=False):
+        """Fetches the events stream for a given user.
+
+        If `only_room_events` is `True` only room events will be returned.
+        """
         auth_user = UserID.from_string(auth_user_id)
 
         try:
@@ -89,7 +94,8 @@ class EventStreamHandler(BaseHandler):
                 timeout = random.randint(int(timeout*0.9), int(timeout*1.1))
 
             events, tokens = yield self.notifier.get_events_for(
-                auth_user, room_ids, pagin_config, timeout
+                auth_user, room_ids, pagin_config, timeout,
+                only_room_events=only_room_events
             )
 
             time_now = self.clock.time_msec()
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 026bd2b9d4..d7096aab8c 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -204,15 +204,11 @@ class TypingNotificationHandler(BaseHandler):
             )
 
     def _push_update_local(self, room_id, user, typing):
-        if room_id not in self._room_serials:
-            self._room_serials[room_id] = 0
-            self._room_typing[room_id] = set()
-
-        room_set = self._room_typing[room_id]
+        room_set = self._room_typing.setdefault(room_id, set())
         if typing:
             room_set.add(user)
-        elif user in room_set:
-            room_set.remove(user)
+        else:
+            room_set.discard(user)
 
         self._latest_room_serial += 1
         self._room_serials[room_id] = self._latest_room_serial
@@ -260,8 +256,8 @@ class TypingNotificationEventSource(object):
         )
 
         events = []
-        for room_id in handler._room_serials:
-            if room_id not in joined_room_ids:
+        for room_id in joined_room_ids:
+            if room_id not in handler._room_serials:
                 continue
             if handler._room_serials[room_id] <= from_key:
                 continue
diff --git a/synapse/notifier.py b/synapse/notifier.py
index dbd8efe9fb..f998fc83bf 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -328,10 +328,13 @@ class Notifier(object):
         defer.returnValue(result)
 
     @defer.inlineCallbacks
-    def get_events_for(self, user, rooms, pagination_config, timeout):
+    def get_events_for(self, user, rooms, pagination_config, timeout,
+                       only_room_events=False):
         """ For the given user and rooms, return any new events for them. If
         there are no new events wait for up to `timeout` milliseconds for any
         new events to happen before returning.
+
+        If `only_room_events` is `True` only room events will be returned.
         """
         from_token = pagination_config.from_token
         if not from_token:
@@ -352,6 +355,8 @@ class Notifier(object):
                 after_id = getattr(after_token, keyname)
                 if before_id == after_id:
                     continue
+                if only_room_events and name != "room":
+                    continue
                 new_events, new_key = yield source.get_new_events_for_user(
                     user, getattr(from_token, keyname), limit,
                 )
diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py
index 13002e0db4..f1952b5a0f 100644
--- a/synapse/push/__init__.py
+++ b/synapse/push/__init__.py
@@ -249,7 +249,9 @@ class Pusher(object):
             # we fail to dispatch the push)
             config = PaginationConfig(from_token=None, limit='1')
             chunk = yield self.evStreamHandler.get_stream(
-                self.user_name, config, timeout=0)
+                self.user_name, config, timeout=0, affect_presence=False,
+                only_room_events=True
+            )
             self.last_token = chunk['end']
             self.store.update_pusher_last_token(
                 self.app_id, self.pushkey, self.user_name, self.last_token
@@ -280,8 +282,8 @@ class Pusher(object):
         config = PaginationConfig(from_token=from_tok, limit='1')
         timeout = (300 + random.randint(-60, 60)) * 1000
         chunk = yield self.evStreamHandler.get_stream(
-            self.user_name, config,
-            timeout=timeout, affect_presence=False
+            self.user_name, config, timeout=timeout, affect_presence=False,
+            only_room_events=True
         )
 
         # limiting to 1 may get 1 event plus 1 presence event, so
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 4c07aa07aa..8ec272fd5f 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -46,8 +46,8 @@ CONDITIONAL_REQUIREMENTS = {
 
 def requirements(config=None, include_conditional=False):
     reqs = REQUIREMENTS.copy()
-    for key, req in CONDITIONAL_REQUIREMENTS.items():
-        if (config and getattr(config, key)) or include_conditional:
+    if include_conditional:
+        for _, req in CONDITIONAL_REQUIREMENTS.items():
             reqs.update(req)
     return reqs
 
@@ -55,13 +55,13 @@ def requirements(config=None, include_conditional=False):
 def github_link(project, version, egg):
     return "https://github.com/%s/tarball/%s/#egg=%s" % (project, version, egg)
 
-DEPENDENCY_LINKS = [
-    github_link(
+DEPENDENCY_LINKS = {
+    "matrix-angular-sdk": github_link(
         project="matrix-org/matrix-angular-sdk",
         version="v0.6.6",
         egg="matrix_angular_sdk-0.6.6",
     ),
-]
+}
 
 
 class MissingRequirementError(Exception):
@@ -129,7 +129,7 @@ def check_requirements(config=None):
 def list_requirements():
     result = []
     linked = []
-    for link in DEPENDENCY_LINKS:
+    for link in DEPENDENCY_LINKS.values():
         egg = link.split("#egg=")[1]
         linked.append(egg.split('-')[0])
         result.append(link)
diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py
index 4e21527c3d..b2aeb8c909 100644
--- a/synapse/rest/media/v1/base_resource.py
+++ b/synapse/rest/media/v1/base_resource.py
@@ -33,6 +33,8 @@ import os
 
 import cgi
 import logging
+import urllib
+import urlparse
 
 logger = logging.getLogger(__name__)
 
@@ -42,10 +44,13 @@ def parse_media_id(request):
         # This allows users to append e.g. /test.png to the URL. Useful for
         # clients that parse the URL to see content type.
         server_name, media_id = request.postpath[:2]
-        if len(request.postpath) > 2 and is_ascii(request.postpath[-1]):
-            return server_name, media_id, request.postpath[-1]
-        else:
-            return server_name, media_id, None
+        file_name = None
+        if len(request.postpath) > 2:
+            try:
+                file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8")
+            except UnicodeDecodeError:
+                pass
+        return server_name, media_id, file_name
     except:
         raise SynapseError(
             404,
@@ -140,9 +145,26 @@ class BaseMediaResource(Resource):
             content_disposition = headers.get("Content-Disposition", None)
             if content_disposition:
                 _, params = cgi.parse_header(content_disposition[0],)
-                upload_name = params.get("filename", None)
-                if upload_name and not is_ascii(upload_name):
-                    upload_name = None
+                upload_name = None
+
+                # First check if there is a valid UTF-8 filename
+                upload_name_utf8 = params.get("filename*", None)
+                if upload_name_utf8:
+                    if upload_name_utf8.lower().startswith("utf-8''"):
+                        upload_name = upload_name_utf8[7:]
+
+                # If there isn't check for an ascii name.
+                if not upload_name:
+                    upload_name_ascii = params.get("filename", None)
+                    if upload_name_ascii and is_ascii(upload_name_ascii):
+                        upload_name = upload_name_ascii
+
+                if upload_name:
+                    upload_name = urlparse.unquote(upload_name)
+                    try:
+                        upload_name = upload_name.decode("utf-8")
+                    except UnicodeDecodeError:
+                        upload_name = None
             else:
                 upload_name = None
 
@@ -181,10 +203,20 @@ class BaseMediaResource(Resource):
         if os.path.isfile(file_path):
             request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
             if upload_name:
-                request.setHeader(
-                    b"Content-Disposition",
-                    b"inline; filename=%s" % (upload_name.encode("utf-8"),),
-                )
+                if is_ascii(upload_name):
+                    request.setHeader(
+                        b"Content-Disposition",
+                        b"inline; filename=%s" % (
+                            urllib.quote(upload_name.encode("utf-8")),
+                        ),
+                    )
+                else:
+                    request.setHeader(
+                        b"Content-Disposition",
+                        b"inline; filename*=utf-8''%s" % (
+                            urllib.quote(upload_name.encode("utf-8")),
+                        ),
+                    )
 
             # cache for at least a day.
             # XXX: we might want to turn this off for data we don't want to
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 439d5a30a8..6abaf56b25 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -15,7 +15,7 @@
 
 from synapse.http.server import respond_with_json, request_handler
 
-from synapse.util.stringutils import random_string, is_ascii
+from synapse.util.stringutils import random_string
 from synapse.api.errors import SynapseError
 
 from twisted.web.server import NOT_DONE_YET
@@ -86,9 +86,13 @@ class UploadResource(BaseMediaResource):
 
         upload_name = request.args.get("filename", None)
         if upload_name:
-            upload_name = upload_name[0]
-            if upload_name and not is_ascii(upload_name):
-                raise SynapseError(400, "filename must be ascii")
+            try:
+                upload_name = upload_name[0].decode('UTF-8')
+            except UnicodeDecodeError:
+                raise SynapseError(
+                    msg="Invalid UTF-8 filename parameter: %r" % (upload_name),
+                    code=400,
+                )
 
         headers = request.requestHeaders
 
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index bc90e17c63..989ad340b0 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -331,7 +331,10 @@ class EventFederationStore(SQLBaseStore):
 
         txn.executemany(
             query,
-            [(ev.event_id, ev.room_id, ev.event_id) for ev in events]
+            [
+                (ev.event_id, ev.room_id, ev.event_id) for ev in events
+                if not ev.internal_metadata.is_outlier()
+            ]
         )
 
         query = (
@@ -358,7 +361,10 @@ class EventFederationStore(SQLBaseStore):
         )
         txn.executemany(
             query,
-            [(ev.event_id, ev.room_id) for ev in events]
+            [
+                (ev.event_id, ev.room_id) for ev in events
+                if not ev.internal_metadata.is_outlier()
+            ]
         )
 
         for room_id in events_by_room:
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index c9110e6304..9630efcfcc 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -403,8 +403,15 @@ class StateStore(SQLBaseStore):
                 state_dict = results[group]
 
             for event_id in state_ids:
-                state_event = state_events[event_id]
-                state_dict[(state_event.type, state_event.state_key)] = state_event
+                try:
+                    state_event = state_events[event_id]
+                    state_dict[(state_event.type, state_event.state_key)] = state_event
+                except KeyError:
+                    # Hmm. So we do don't have that state event? Interesting.
+                    logger.warn(
+                        "Can't find state event %r for state group %r",
+                        event_id, group,
+                    )
 
             self._state_group_cache.update(
                 cache_seq_num,
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 7a1e96af37..f3a36340e4 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -38,6 +38,8 @@ def random_string_with_symbols(length):
 def is_ascii(s):
     try:
         s.encode("ascii")
+    except UnicodeEncodeError:
+        return False
     except UnicodeDecodeError:
         return False
     else: