summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorHillery Shay <shaysquared@gmail.com>2021-09-22 08:25:26 -0700
committerGitHub <noreply@github.com>2021-09-22 08:25:26 -0700
commitf78b68a96b1f179043b38b4109e09fa0a315643d (patch)
tree385b7aaa1987b730e15eb396dedaa71d4ac0557c /synapse/storage
parentFix invalidating OTK count cache after claim (#10875) (diff)
downloadsynapse-f78b68a96b1f179043b38b4109e09fa0a315643d.tar.xz
Treat "\u0000" as "\u0020" for the purposes of message search (message indexing) (#10820)
* add test to check if null code points are being inserted

* add logic to detect and replace null code points before insertion into db

* lints

* add license to test

* change approach to null substitution

* add type hint for SearchEntry

* Add changelog entry

Signed-off-by: H.Shay <shaysquared@gmail.com>

* updated changelog

* update chanelog message

* remove duplicate changelog

* Update synapse/storage/databases/main/events.py remove extra space

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* rename and move test file, update tests, delete old test file

* fix typo in comments

* update _find_highlights_in_postgres to replace null byte with space

* replace null byte in sqlite search insertion

* beef up and reorganize test for this pr

* update changelog

* add type hints and update docstring

* check db engine directly vs using env variable

* refactor tests to be less repetetive

* move rplace logic into seperate function

* requested changes

* Fix typo.

* Update synapse/storage/databases/main/search.py

Co-authored-by: reivilibre <olivier@librepush.net>

* Update changelog.d/10820.misc

Co-authored-by: Aaron Raimist <aaron@raim.ist>

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: reivilibre <olivier@librepush.net>
Co-authored-by: Aaron Raimist <aaron@raim.ist>
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/databases/main/search.py34
1 files changed, 25 insertions, 9 deletions
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py

index 6480d5a9f5..2a1e99e17a 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py
@@ -15,12 +15,12 @@ import logging import re from collections import namedtuple -from typing import Collection, List, Optional, Set +from typing import Collection, Iterable, List, Optional, Set from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.engines import PostgresEngine, Sqlite3Engine @@ -32,14 +32,24 @@ SearchEntry = namedtuple( ) +def _clean_value_for_search(value: str) -> str: + """ + Replaces any null code points in the string with spaces as + Postgres and SQLite do not like the insertion of strings with + null code points into the full-text search tables. + """ + return value.replace("\u0000", " ") + + class SearchWorkerStore(SQLBaseStore): - def store_search_entries_txn(self, txn, entries): + def store_search_entries_txn( + self, txn: LoggingTransaction, entries: Iterable[SearchEntry] + ) -> None: """Add entries to the search table Args: - txn (cursor): - entries (iterable[SearchEntry]): - entries to be added to the table + txn: + entries: entries to be added to the table """ if not self.hs.config.enable_search: return @@ -55,7 +65,7 @@ class SearchWorkerStore(SQLBaseStore): entry.event_id, entry.room_id, entry.key, - entry.value, + _clean_value_for_search(entry.value), entry.stream_ordering, entry.origin_server_ts, ) @@ -70,11 +80,16 @@ class SearchWorkerStore(SQLBaseStore): " VALUES (?,?,?,?)" ) args = ( - (entry.event_id, entry.room_id, entry.key, entry.value) + ( + entry.event_id, + entry.room_id, + entry.key, + _clean_value_for_search(entry.value), + ) for entry in entries ) - txn.execute_batch(sql, args) + else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -646,6 +661,7 @@ class SearchStore(SearchBackgroundUpdateStore): for key in ("body", "name", "topic"): v = event.content.get(key, None) if v: + v = _clean_value_for_search(v) values.append(v) if not values: