diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc
new file mode 100644
index 0000000000..4373bf6f6b
--- /dev/null
+++ b/changelog.d/10820.misc
@@ -0,0 +1 @@
+Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 6480d5a9f5..2a1e99e17a 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -15,12 +15,12 @@
import logging
import re
from collections import namedtuple
-from typing import Collection, List, Optional, Set
+from typing import Collection, Iterable, List, Optional, Set
from synapse.api.errors import SynapseError
from synapse.events import EventBase
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
-from synapse.storage.database import DatabasePool
+from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
@@ -32,14 +32,24 @@ SearchEntry = namedtuple(
)
+def _clean_value_for_search(value: str) -> str:
+ """
+ Replaces any null code points in the string with spaces as
+ Postgres and SQLite do not like the insertion of strings with
+ null code points into the full-text search tables.
+ """
+ return value.replace("\u0000", " ")
+
+
class SearchWorkerStore(SQLBaseStore):
- def store_search_entries_txn(self, txn, entries):
+ def store_search_entries_txn(
+ self, txn: LoggingTransaction, entries: Iterable[SearchEntry]
+ ) -> None:
"""Add entries to the search table
Args:
- txn (cursor):
- entries (iterable[SearchEntry]):
- entries to be added to the table
+ txn:
+ entries: entries to be added to the table
"""
if not self.hs.config.enable_search:
return
@@ -55,7 +65,7 @@ class SearchWorkerStore(SQLBaseStore):
entry.event_id,
entry.room_id,
entry.key,
- entry.value,
+ _clean_value_for_search(entry.value),
entry.stream_ordering,
entry.origin_server_ts,
)
@@ -70,11 +80,16 @@ class SearchWorkerStore(SQLBaseStore):
" VALUES (?,?,?,?)"
)
args = (
- (entry.event_id, entry.room_id, entry.key, entry.value)
+ (
+ entry.event_id,
+ entry.room_id,
+ entry.key,
+ _clean_value_for_search(entry.value),
+ )
for entry in entries
)
-
txn.execute_batch(sql, args)
+
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")
@@ -646,6 +661,7 @@ class SearchStore(SearchBackgroundUpdateStore):
for key in ("body", "name", "topic"):
v = event.content.get(key, None)
if v:
+ v = _clean_value_for_search(v)
values.append(v)
if not values:
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
new file mode 100644
index 0000000000..8971ecccbd
--- /dev/null
+++ b/tests/storage/test_room_search.py
@@ -0,0 +1,74 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import synapse.rest.admin
+from synapse.rest.client import login, room
+from synapse.storage.engines import PostgresEngine
+
+from tests.unittest import HomeserverTestCase
+
+
+class NullByteInsertionTest(HomeserverTestCase):
+ servlets = [
+ synapse.rest.admin.register_servlets_for_client_rest_resource,
+ login.register_servlets,
+ room.register_servlets,
+ ]
+
+ def test_null_byte(self):
+ """
+ Postgres/SQLite don't like null bytes going into the search tables. Internally
+ we replace those with a space.
+
+ Ensure this doesn't break anything.
+ """
+
+ # Register a user and create a room, create some messages
+ self.register_user("alice", "password")
+ access_token = self.login("alice", "password")
+ room_id = self.helper.create_room_as("alice", tok=access_token)
+
+ # Send messages and ensure they don't cause an internal server
+ # error
+ for body in ["hi\u0000bob", "another message", "hi alice"]:
+ response = self.helper.send(room_id, body, tok=access_token)
+ self.assertIn("event_id", response)
+
+ # Check that search works for the message where the null byte was replaced
+ store = self.hs.get_datastore()
+ result = self.get_success(
+ store.search_msgs([room_id], "hi bob", ["content.body"])
+ )
+ self.assertEquals(result.get("count"), 1)
+ if isinstance(store.database_engine, PostgresEngine):
+ self.assertIn("hi", result.get("highlights"))
+ self.assertIn("bob", result.get("highlights"))
+
+ # Check that search works for an unrelated message
+ result = self.get_success(
+ store.search_msgs([room_id], "another", ["content.body"])
+ )
+ self.assertEquals(result.get("count"), 1)
+ if isinstance(store.database_engine, PostgresEngine):
+ self.assertIn("another", result.get("highlights"))
+
+ # Check that search works for a search term that overlaps with the message
+ # containing a null byte and an unrelated message.
+ result = self.get_success(store.search_msgs([room_id], "hi", ["content.body"]))
+ self.assertEquals(result.get("count"), 2)
+ result = self.get_success(
+ store.search_msgs([room_id], "hi alice", ["content.body"])
+ )
+ if isinstance(store.database_engine, PostgresEngine):
+ self.assertIn("alice", result.get("highlights"))
|