From 5fdc12f482c68e2cdbb78d7db5de2cfe621720d4 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Tue, 7 Feb 2023 01:10:54 +0100 Subject: Add `event_stream_ordering` column to membership state tables (#14979) This adds an `event_stream_ordering` column to `current_state_events`, `local_current_membership` and `room_memberships`. Each of these tables is regularly joined with the `events` table to get the stream ordering and denormalising this into each table will yield significant query performance improvements once used. Includes a background job to populate these values from the `events` table. Same idea as https://github.com/matrix-org/synapse/pull/13703. Signed off by Nick @ Beeper (@fizzadar). --- synapse/storage/databases/main/events.py | 23 +++-- .../storage/databases/main/events_bg_updates.py | 104 ++++++++++++++++++++- synapse/storage/databases/main/events_worker.py | 8 +- .../26membership_tables_event_stream_ordering.sql | 21 +++++ 4 files changed, 145 insertions(+), 11 deletions(-) create mode 100644 synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql (limited to 'synapse') diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1536937b67..b6cce0a7cc 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1147,11 +1147,15 @@ class PersistEventsStore: # been inserted into room_memberships. txn.execute_batch( """INSERT INTO current_state_events - (room_id, type, state_key, event_id, membership) - VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?)) + (room_id, type, state_key, event_id, membership, event_stream_ordering) + VALUES ( + ?, ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?) + ) """, [ - (room_id, key[0], key[1], ev_id, ev_id) + (room_id, key[0], key[1], ev_id, ev_id, ev_id) for key, ev_id in to_insert.items() ], ) @@ -1178,11 +1182,15 @@ class PersistEventsStore: if to_insert: txn.execute_batch( """INSERT INTO local_current_membership - (room_id, user_id, event_id, membership) - VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?)) + (room_id, user_id, event_id, membership, event_stream_ordering) + VALUES ( + ?, ?, ?, + (SELECT membership FROM room_memberships WHERE event_id = ?), + (SELECT stream_ordering FROM events WHERE event_id = ?) + ) """, [ - (room_id, key[1], ev_id, ev_id) + (room_id, key[1], ev_id, ev_id, ev_id) for key, ev_id in to_insert.items() if key[0] == EventTypes.Member and self.is_mine_id(key[1]) ], @@ -1790,6 +1798,7 @@ class PersistEventsStore: table="room_memberships", keys=( "event_id", + "event_stream_ordering", "user_id", "sender", "room_id", @@ -1800,6 +1809,7 @@ class PersistEventsStore: values=[ ( event.event_id, + event.internal_metadata.stream_ordering, event.state_key, event.user_id, event.room_id, @@ -1832,6 +1842,7 @@ class PersistEventsStore: keyvalues={"room_id": event.room_id, "user_id": event.state_key}, values={ "event_id": event.event_id, + "event_stream_ordering": event.internal_metadata.stream_ordering, "membership": event.membership, }, ) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index b9d3c36d60..0e81d38cca 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Tuple, ca import attr -from synapse.api.constants import EventContentFields, RelationTypes +from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import make_event_from_dict from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause @@ -71,6 +71,10 @@ class _BackgroundUpdates: EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index" + POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING = ( + "populate_membership_event_stream_ordering" + ) + @attr.s(slots=True, frozen=True, auto_attribs=True) class _CalculateChainCover: @@ -99,6 +103,10 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): ): super().__init__(database, db_conn, hs) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING, + self._populate_membership_event_stream_ordering, + ) self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts, @@ -1498,3 +1506,97 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): ) return batch_size + + async def _populate_membership_event_stream_ordering( + self, progress: JsonDict, batch_size: int + ) -> int: + def _populate_membership_event_stream_ordering( + txn: LoggingTransaction, + ) -> bool: + + if "max_stream_ordering" in progress: + max_stream_ordering = progress["max_stream_ordering"] + else: + txn.execute("SELECT max(stream_ordering) FROM events") + res = txn.fetchone() + if res is None or res[0] is None: + return True + else: + max_stream_ordering = res[0] + + start = progress.get("stream_ordering", 0) + stop = start + batch_size + + sql = f""" + SELECT room_id, event_id, stream_ordering + FROM events + WHERE + type = '{EventTypes.Member}' + AND stream_ordering >= ? + AND stream_ordering < ? + """ + txn.execute(sql, (start, stop)) + + rows: List[Tuple[str, str, int]] = cast( + List[Tuple[str, str, int]], txn.fetchall() + ) + + event_ids: List[Tuple[str]] = [] + event_stream_orderings: List[Tuple[int]] = [] + + for _, event_id, event_stream_ordering in rows: + event_ids.append((event_id,)) + event_stream_orderings.append((event_stream_ordering,)) + + self.db_pool.simple_update_many_txn( + txn, + table="current_state_events", + key_names=("event_id",), + key_values=event_ids, + value_names=("event_stream_ordering",), + value_values=event_stream_orderings, + ) + + self.db_pool.simple_update_many_txn( + txn, + table="room_memberships", + key_names=("event_id",), + key_values=event_ids, + value_names=("event_stream_ordering",), + value_values=event_stream_orderings, + ) + + # NOTE: local_current_membership has no index on event_id, so only + # the room ID here will reduce the query rows read. + for room_id, event_id, event_stream_ordering in rows: + txn.execute( + """ + UPDATE local_current_membership + SET event_stream_ordering = ? + WHERE room_id = ? AND event_id = ? + """, + (event_stream_ordering, room_id, event_id), + ) + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING, + { + "stream_ordering": stop, + "max_stream_ordering": max_stream_ordering, + }, + ) + + return stop > max_stream_ordering + + finished = await self.db_pool.runInteraction( + "_populate_membership_event_stream_ordering", + _populate_membership_event_stream_ordering, + ) + + if finished: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING + ) + + return batch_size diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index d7d08369ca..6d0ef10258 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1779,7 +1779,7 @@ class EventsWorkerStore(SQLBaseStore): txn: LoggingTransaction, ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]: sql = ( - "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," + "SELECT out.event_stream_ordering, e.event_id, e.room_id, e.type," " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL," " e.outlier" " FROM events AS e" @@ -1791,10 +1791,10 @@ class EventsWorkerStore(SQLBaseStore): " LEFT JOIN event_relations USING (event_id)" " LEFT JOIN room_memberships USING (event_id)" " LEFT JOIN rejections USING (event_id)" - " WHERE ? < event_stream_ordering" - " AND event_stream_ordering <= ?" + " WHERE ? < out.event_stream_ordering" + " AND out.event_stream_ordering <= ?" " AND out.instance_name = ?" - " ORDER BY event_stream_ordering ASC" + " ORDER BY out.event_stream_ordering ASC" ) txn.execute(sql, (last_id, current_id, instance_name)) diff --git a/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql b/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql new file mode 100644 index 0000000000..7c30a67fc4 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql @@ -0,0 +1,21 @@ +/* Copyright 2022 Beeper + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT; +ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT; +ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_membership_event_stream_ordering', '{}'); -- cgit 1.5.1 From d0fed7a37b8b6ce166cae856fe243757aa7c7294 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 7 Feb 2023 00:20:04 +0000 Subject: Properly typecheck types.http (#14988) * Tweak http types in Synapse AFACIS these are correct, and they make mypy happier on tests.http. * Type hints for test_proxyagent * type hints for test_srv_resolver * test_matrix_federation_agent * tests.http.server._base * tests.http.__init__ * tests.http.test_additional_resource * tests.http.test_client * tests.http.test_endpoint * tests.http.test_matrixfederationclient * tests.http.test_servlet * tests.http.test_simple_client * tests.http.test_site * One fixup in tests.server * Untyped defs * Changelog * Fixup syntax for Python 3.7 * Fix olddeps syntax * Use a twisted IPv4 addr for dummy_address * Fix typo, thanks Sean Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Remove redundant `Optional` --------- Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/14988.misc | 1 + mypy.ini | 6 +- synapse/http/client.py | 5 +- synapse/http/proxyagent.py | 3 +- tests/http/__init__.py | 19 ++- .../federation/test_matrix_federation_agent.py | 142 +++++++++++++-------- tests/http/federation/test_srv_resolver.py | 60 +++++---- tests/http/server/_base.py | 2 +- tests/http/test_additional_resource.py | 18 ++- tests/http/test_client.py | 37 ++++-- tests/http/test_endpoint.py | 4 +- tests/http/test_matrixfederationclient.py | 53 ++++---- tests/http/test_proxyagent.py | 103 +++++++++------ tests/http/test_servlet.py | 8 +- tests/http/test_simple_client.py | 14 +- tests/http/test_site.py | 8 +- tests/server.py | 6 +- 17 files changed, 298 insertions(+), 191 deletions(-) create mode 100644 changelog.d/14988.misc (limited to 'synapse') diff --git a/changelog.d/14988.misc b/changelog.d/14988.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/14988.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/mypy.ini b/mypy.ini index 93de1c97ea..11e683b704 100644 --- a/mypy.ini +++ b/mypy.ini @@ -32,9 +32,6 @@ exclude = (?x) |synapse/storage/databases/main/cache.py |synapse/storage/schema/ - |tests/http/federation/test_matrix_federation_agent.py - |tests/http/federation/test_srv_resolver.py - |tests/http/test_proxyagent.py |tests/module_api/test_api.py |tests/rest/media/v1/test_media_storage.py |tests/server.py @@ -92,6 +89,9 @@ disallow_untyped_defs = True [mypy-tests.handlers.*] disallow_untyped_defs = True +[mypy-tests.http.*] +disallow_untyped_defs = True + [mypy-tests.logging.*] disallow_untyped_defs = True diff --git a/synapse/http/client.py b/synapse/http/client.py index 4eb740c040..a05f297933 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -44,6 +44,7 @@ from twisted.internet.interfaces import ( IAddress, IDelayedCall, IHostResolution, + IReactorCore, IReactorPluggableNameResolver, IReactorTime, IResolutionReceiver, @@ -226,7 +227,9 @@ class _IPBlacklistingResolver: return recv -@implementer(ISynapseReactor) +# ISynapseReactor implies IReactorCore, but explicitly marking it this as an implementer +# of IReactorCore seems to keep mypy-zope happier. +@implementer(IReactorCore, ISynapseReactor) class BlacklistingReactorWrapper: """ A Reactor wrapper which will prevent DNS resolution to blacklisted IP diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index 18899bc6d1..94ef737b9e 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -38,7 +38,6 @@ from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS, IResponse from synapse.http import redact_uri from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials -from synapse.types import ISynapseReactor logger = logging.getLogger(__name__) @@ -84,7 +83,7 @@ class ProxyAgent(_AgentBase): def __init__( self, reactor: IReactorCore, - proxy_reactor: Optional[ISynapseReactor] = None, + proxy_reactor: Optional[IReactorCore] = None, contextFactory: Optional[IPolicyForHTTPS] = None, connectTimeout: Optional[float] = None, bindAddress: Optional[bytes] = None, diff --git a/tests/http/__init__.py b/tests/http/__init__.py index 093537adef..528cdee34b 100644 --- a/tests/http/__init__.py +++ b/tests/http/__init__.py @@ -19,13 +19,15 @@ from zope.interface import implementer from OpenSSL import SSL from OpenSSL.SSL import Connection +from twisted.internet.address import IPv4Address from twisted.internet.interfaces import IOpenSSLServerConnectionCreator from twisted.internet.ssl import Certificate, trustRootFromCertificates +from twisted.protocols.tls import TLSMemoryBIOProtocol from twisted.web.client import BrowserLikePolicyForHTTPS # noqa: F401 from twisted.web.iweb import IPolicyForHTTPS # noqa: F401 -def get_test_https_policy(): +def get_test_https_policy() -> BrowserLikePolicyForHTTPS: """Get a test IPolicyForHTTPS which trusts the test CA cert Returns: @@ -39,7 +41,7 @@ def get_test_https_policy(): return BrowserLikePolicyForHTTPS(trustRoot=trust_root) -def get_test_ca_cert_file(): +def get_test_ca_cert_file() -> str: """Get the path to the test CA cert The keypair is generated with: @@ -51,7 +53,7 @@ def get_test_ca_cert_file(): return os.path.join(os.path.dirname(__file__), "ca.crt") -def get_test_key_file(): +def get_test_key_file() -> str: """get the path to the test key The key file is made with: @@ -137,15 +139,20 @@ class TestServerTLSConnectionFactory: """An SSL connection creator which returns connections which present a certificate signed by our test CA.""" - def __init__(self, sanlist): + def __init__(self, sanlist: List[bytes]): """ Args: - sanlist: list[bytes]: a list of subjectAltName values for the cert + sanlist: a list of subjectAltName values for the cert """ self._cert_file = create_test_cert_file(sanlist) - def serverConnectionForTLS(self, tlsProtocol): + def serverConnectionForTLS(self, tlsProtocol: TLSMemoryBIOProtocol) -> Connection: ctx = SSL.Context(SSL.SSLv23_METHOD) ctx.use_certificate_file(self._cert_file) ctx.use_privatekey_file(get_test_key_file()) return Connection(ctx, None) + + +# A dummy address, useful for tests that use FakeTransport and don't care about where +# packets are going to/coming from. +dummy_address = IPv4Address("TCP", "127.0.0.1", 80) diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 992d8f94fd..acfdcd3bca 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -14,7 +14,7 @@ import base64 import logging import os -from typing import Iterable, Optional +from typing import Any, Awaitable, Callable, Generator, List, Optional, cast from unittest.mock import Mock, patch import treq @@ -24,14 +24,19 @@ from zope.interface import implementer from twisted.internet import defer from twisted.internet._sslverify import ClientTLSOptions, OpenSSLCertificateOptions -from twisted.internet.interfaces import IProtocolFactory +from twisted.internet.defer import Deferred +from twisted.internet.endpoints import _WrappingProtocol +from twisted.internet.interfaces import ( + IOpenSSLClientConnectionCreator, + IProtocolFactory, +) from twisted.internet.protocol import Factory from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol from twisted.web._newclient import ResponseNeverReceived from twisted.web.client import Agent from twisted.web.http import HTTPChannel, Request from twisted.web.http_headers import Headers -from twisted.web.iweb import IPolicyForHTTPS +from twisted.web.iweb import IPolicyForHTTPS, IResponse from synapse.config.homeserver import HomeServerConfig from synapse.crypto.context_factory import FederationPolicyForHTTPS @@ -42,11 +47,21 @@ from synapse.http.federation.well_known_resolver import ( WellKnownResolver, _cache_period_from_headers, ) -from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context +from synapse.logging.context import ( + SENTINEL_CONTEXT, + LoggingContext, + LoggingContextOrSentinel, + current_context, +) +from synapse.types import ISynapseReactor from synapse.util.caches.ttlcache import TTLCache from tests import unittest -from tests.http import TestServerTLSConnectionFactory, get_test_ca_cert_file +from tests.http import ( + TestServerTLSConnectionFactory, + dummy_address, + get_test_ca_cert_file, +) from tests.server import FakeTransport, ThreadedMemoryReactorClock from tests.utils import default_config @@ -54,15 +69,17 @@ logger = logging.getLogger(__name__) # Once Async Mocks or lambdas are supported this can go away. -def generate_resolve_service(result): - async def resolve_service(_): +def generate_resolve_service( + result: List[Server], +) -> Callable[[Any], Awaitable[List[Server]]]: + async def resolve_service(_: Any) -> List[Server]: return result return resolve_service class MatrixFederationAgentTests(unittest.TestCase): - def setUp(self): + def setUp(self) -> None: self.reactor = ThreadedMemoryReactorClock() self.mock_resolver = Mock() @@ -75,8 +92,12 @@ class MatrixFederationAgentTests(unittest.TestCase): self.tls_factory = FederationPolicyForHTTPS(config) - self.well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds) - self.had_well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds) + self.well_known_cache: TTLCache[bytes, Optional[bytes]] = TTLCache( + "test_cache", timer=self.reactor.seconds + ) + self.had_well_known_cache: TTLCache[bytes, bool] = TTLCache( + "test_cache", timer=self.reactor.seconds + ) self.well_known_resolver = WellKnownResolver( self.reactor, Agent(self.reactor, contextFactory=self.tls_factory), @@ -89,8 +110,8 @@ class MatrixFederationAgentTests(unittest.TestCase): self, client_factory: IProtocolFactory, ssl: bool = True, - expected_sni: bytes = None, - tls_sanlist: Optional[Iterable[bytes]] = None, + expected_sni: Optional[bytes] = None, + tls_sanlist: Optional[List[bytes]] = None, ) -> HTTPChannel: """Builds a test server, and completes the outgoing client connection Args: @@ -116,8 +137,8 @@ class MatrixFederationAgentTests(unittest.TestCase): if ssl: server_factory = _wrap_server_factory_for_tls(server_factory, tls_sanlist) - server_protocol = server_factory.buildProtocol(None) - + server_protocol = server_factory.buildProtocol(dummy_address) + assert server_protocol is not None # now, tell the client protocol factory to build the client protocol (it will be a # _WrappingProtocol, around a TLSMemoryBIOProtocol, around an # HTTP11ClientProtocol) and wire the output of said protocol up to the server via @@ -125,7 +146,8 @@ class MatrixFederationAgentTests(unittest.TestCase): # # Normally this would be done by the TCP socket code in Twisted, but we are # stubbing that out here. - client_protocol = client_factory.buildProtocol(None) + client_protocol = client_factory.buildProtocol(dummy_address) + assert isinstance(client_protocol, _WrappingProtocol) client_protocol.makeConnection( FakeTransport(server_protocol, self.reactor, client_protocol) ) @@ -136,6 +158,7 @@ class MatrixFederationAgentTests(unittest.TestCase): ) if ssl: + assert isinstance(server_protocol, TLSMemoryBIOProtocol) # fish the test server back out of the server-side TLS protocol. http_protocol = server_protocol.wrappedProtocol # grab a hold of the TLS connection, in case it gets torn down @@ -144,6 +167,7 @@ class MatrixFederationAgentTests(unittest.TestCase): http_protocol = server_protocol tls_connection = None + assert isinstance(http_protocol, HTTPChannel) # give the reactor a pump to get the TLS juices flowing (if needed) self.reactor.advance(0) @@ -159,12 +183,14 @@ class MatrixFederationAgentTests(unittest.TestCase): return http_protocol @defer.inlineCallbacks - def _make_get_request(self, uri: bytes): + def _make_get_request( + self, uri: bytes + ) -> Generator["Deferred[object]", object, IResponse]: """ Sends a simple GET request via the agent, and checks its logcontext management """ with LoggingContext("one") as context: - fetch_d = self.agent.request(b"GET", uri) + fetch_d: Deferred[IResponse] = self.agent.request(b"GET", uri) # Nothing happened yet self.assertNoResult(fetch_d) @@ -172,8 +198,9 @@ class MatrixFederationAgentTests(unittest.TestCase): # should have reset logcontext to the sentinel _check_logcontext(SENTINEL_CONTEXT) + fetch_res: IResponse try: - fetch_res = yield fetch_d + fetch_res = yield fetch_d # type: ignore[misc, assignment] return fetch_res except Exception as e: logger.info("Fetch of %s failed: %s", uri.decode("ascii"), e) @@ -216,7 +243,7 @@ class MatrixFederationAgentTests(unittest.TestCase): request: Request, content: bytes, headers: Optional[dict] = None, - ): + ) -> None: """Check that an incoming request looks like a valid .well-known request, and send back the response. """ @@ -237,16 +264,16 @@ class MatrixFederationAgentTests(unittest.TestCase): because it is created too early during setUp """ return MatrixFederationAgent( - reactor=self.reactor, + reactor=cast(ISynapseReactor, self.reactor), tls_client_options_factory=self.tls_factory, - user_agent="test-agent", # Note that this is unused since _well_known_resolver is provided. + user_agent=b"test-agent", # Note that this is unused since _well_known_resolver is provided. ip_whitelist=IPSet(), ip_blacklist=IPSet(), _srv_resolver=self.mock_resolver, _well_known_resolver=self.well_known_resolver, ) - def test_get(self): + def test_get(self) -> None: """happy-path test of a GET request with an explicit port""" self._do_get() @@ -254,11 +281,11 @@ class MatrixFederationAgentTests(unittest.TestCase): os.environ, {"https_proxy": "proxy.com", "no_proxy": "testserv"}, ) - def test_get_bypass_proxy(self): + def test_get_bypass_proxy(self) -> None: """test of a GET request with an explicit port and bypass proxy""" self._do_get() - def _do_get(self): + def _do_get(self) -> None: """test of a GET request with an explicit port""" self.agent = self._make_agent() @@ -318,7 +345,7 @@ class MatrixFederationAgentTests(unittest.TestCase): @patch.dict( os.environ, {"https_proxy": "http://proxy.com", "no_proxy": "unused.com"} ) - def test_get_via_http_proxy(self): + def test_get_via_http_proxy(self) -> None: """test for federation request through a http proxy""" self._do_get_via_proxy(expect_proxy_ssl=False, expected_auth_credentials=None) @@ -326,7 +353,7 @@ class MatrixFederationAgentTests(unittest.TestCase): os.environ, {"https_proxy": "http://user:pass@proxy.com", "no_proxy": "unused.com"}, ) - def test_get_via_http_proxy_with_auth(self): + def test_get_via_http_proxy_with_auth(self) -> None: """test for federation request through a http proxy with authentication""" self._do_get_via_proxy( expect_proxy_ssl=False, expected_auth_credentials=b"user:pass" @@ -335,7 +362,7 @@ class MatrixFederationAgentTests(unittest.TestCase): @patch.dict( os.environ, {"https_proxy": "https://proxy.com", "no_proxy": "unused.com"} ) - def test_get_via_https_proxy(self): + def test_get_via_https_proxy(self) -> None: """test for federation request through a https proxy""" self._do_get_via_proxy(expect_proxy_ssl=True, expected_auth_credentials=None) @@ -343,7 +370,7 @@ class MatrixFederationAgentTests(unittest.TestCase): os.environ, {"https_proxy": "https://user:pass@proxy.com", "no_proxy": "unused.com"}, ) - def test_get_via_https_proxy_with_auth(self): + def test_get_via_https_proxy_with_auth(self) -> None: """test for federation request through a https proxy with authentication""" self._do_get_via_proxy( expect_proxy_ssl=True, expected_auth_credentials=b"user:pass" @@ -353,7 +380,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self, expect_proxy_ssl: bool = False, expected_auth_credentials: Optional[bytes] = None, - ): + ) -> None: """Send a https federation request via an agent and check that it is correctly received at the proxy and client. The proxy can use either http or https. Args: @@ -418,10 +445,12 @@ class MatrixFederationAgentTests(unittest.TestCase): # now we make another test server to act as the upstream HTTP server. server_ssl_protocol = _wrap_server_factory_for_tls( _get_test_protocol_factory() - ).buildProtocol(None) + ).buildProtocol(dummy_address) + assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol) # Tell the HTTP server to send outgoing traffic back via the proxy's transport. proxy_server_transport = proxy_server.transport + assert proxy_server_transport is not None server_ssl_protocol.makeConnection(proxy_server_transport) # ... and replace the protocol on the proxy's transport with the @@ -451,6 +480,7 @@ class MatrixFederationAgentTests(unittest.TestCase): # now there should be a pending request http_server = server_ssl_protocol.wrappedProtocol + assert isinstance(http_server, HTTPChannel) self.assertEqual(len(http_server.requests), 1) request = http_server.requests[0] @@ -491,7 +521,7 @@ class MatrixFederationAgentTests(unittest.TestCase): json = self.successResultOf(treq.json_content(response)) self.assertEqual(json, {"a": 1}) - def test_get_ip_address(self): + def test_get_ip_address(self) -> None: """ Test the behaviour when the server name contains an explicit IP (with no port) """ @@ -526,7 +556,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_get_ipv6_address(self): + def test_get_ipv6_address(self) -> None: """ Test the behaviour when the server name contains an explicit IPv6 address (with no port) @@ -562,7 +592,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_get_ipv6_address_with_port(self): + def test_get_ipv6_address_with_port(self) -> None: """ Test the behaviour when the server name contains an explicit IPv6 address (with explicit port) @@ -598,7 +628,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_get_hostname_bad_cert(self): + def test_get_hostname_bad_cert(self) -> None: """ Test the behaviour when the certificate on the server doesn't match the hostname """ @@ -651,7 +681,7 @@ class MatrixFederationAgentTests(unittest.TestCase): failure_reason = e.value.reasons[0] self.assertIsInstance(failure_reason.value, VerificationError) - def test_get_ip_address_bad_cert(self): + def test_get_ip_address_bad_cert(self) -> None: """ Test the behaviour when the server name contains an explicit IP, but the server cert doesn't cover it @@ -684,7 +714,7 @@ class MatrixFederationAgentTests(unittest.TestCase): failure_reason = e.value.reasons[0] self.assertIsInstance(failure_reason.value, VerificationError) - def test_get_no_srv_no_well_known(self): + def test_get_no_srv_no_well_known(self) -> None: """ Test the behaviour when the server name has no port, no SRV, and no well-known """ @@ -740,7 +770,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_get_well_known(self): + def test_get_well_known(self) -> None: """Test the behaviour when the .well-known delegates elsewhere""" self.agent = self._make_agent() @@ -802,7 +832,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.well_known_cache.expire() self.assertNotIn(b"testserv", self.well_known_cache) - def test_get_well_known_redirect(self): + def test_get_well_known_redirect(self) -> None: """Test the behaviour when the server name has no port and no SRV record, but the .well-known has a 300 redirect """ @@ -892,7 +922,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.well_known_cache.expire() self.assertNotIn(b"testserv", self.well_known_cache) - def test_get_invalid_well_known(self): + def test_get_invalid_well_known(self) -> None: """ Test the behaviour when the server name has an *invalid* well-known (and no SRV) """ @@ -945,7 +975,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_get_well_known_unsigned_cert(self): + def test_get_well_known_unsigned_cert(self) -> None: """Test the behaviour when the .well-known server presents a cert not signed by a CA """ @@ -969,7 +999,7 @@ class MatrixFederationAgentTests(unittest.TestCase): ip_blacklist=IPSet(), _srv_resolver=self.mock_resolver, _well_known_resolver=WellKnownResolver( - self.reactor, + cast(ISynapseReactor, self.reactor), Agent(self.reactor, contextFactory=tls_factory), b"test-agent", well_known_cache=self.well_known_cache, @@ -999,7 +1029,7 @@ class MatrixFederationAgentTests(unittest.TestCase): b"_matrix._tcp.testserv" ) - def test_get_hostname_srv(self): + def test_get_hostname_srv(self) -> None: """ Test the behaviour when there is a single SRV record """ @@ -1041,7 +1071,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_get_well_known_srv(self): + def test_get_well_known_srv(self) -> None: """Test the behaviour when the .well-known redirects to a place where there is a SRV. """ @@ -1101,7 +1131,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_idna_servername(self): + def test_idna_servername(self) -> None: """test the behaviour when the server name has idna chars in""" self.agent = self._make_agent() @@ -1163,7 +1193,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_idna_srv_target(self): + def test_idna_srv_target(self) -> None: """test the behaviour when the target of a SRV record has idna chars""" self.agent = self._make_agent() @@ -1206,7 +1236,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_well_known_cache(self): + def test_well_known_cache(self) -> None: self.reactor.lookups["testserv"] = "1.2.3.4" fetch_d = defer.ensureDeferred( @@ -1262,7 +1292,7 @@ class MatrixFederationAgentTests(unittest.TestCase): r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, b"other-server") - def test_well_known_cache_with_temp_failure(self): + def test_well_known_cache_with_temp_failure(self) -> None: """Test that we refetch well-known before the cache expires, and that it ignores transient errors. """ @@ -1341,7 +1371,7 @@ class MatrixFederationAgentTests(unittest.TestCase): r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, None) - def test_well_known_too_large(self): + def test_well_known_too_large(self) -> None: """A well-known query that returns a result which is too large should be rejected.""" self.reactor.lookups["testserv"] = "1.2.3.4" @@ -1367,7 +1397,7 @@ class MatrixFederationAgentTests(unittest.TestCase): r = self.successResultOf(fetch_d) self.assertIsNone(r.delegated_server) - def test_srv_fallbacks(self): + def test_srv_fallbacks(self) -> None: """Test that other SRV results are tried if the first one fails.""" self.agent = self._make_agent() @@ -1427,7 +1457,7 @@ class MatrixFederationAgentTests(unittest.TestCase): class TestCachePeriodFromHeaders(unittest.TestCase): - def test_cache_control(self): + def test_cache_control(self) -> None: # uppercase self.assertEqual( _cache_period_from_headers( @@ -1464,7 +1494,7 @@ class TestCachePeriodFromHeaders(unittest.TestCase): 0, ) - def test_expires(self): + def test_expires(self) -> None: self.assertEqual( _cache_period_from_headers( Headers({b"Expires": [b"Wed, 30 Jan 2019 07:35:33 GMT"]}), @@ -1491,14 +1521,14 @@ class TestCachePeriodFromHeaders(unittest.TestCase): self.assertEqual(_cache_period_from_headers(Headers({b"Expires": [b"0"]})), 0) -def _check_logcontext(context): +def _check_logcontext(context: LoggingContextOrSentinel) -> None: current = current_context() if current is not context: raise AssertionError("Expected logcontext %s but was %s" % (context, current)) def _wrap_server_factory_for_tls( - factory: IProtocolFactory, sanlist: Iterable[bytes] = None + factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None ) -> IProtocolFactory: """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory The resultant factory will create a TLS server which presents a certificate @@ -1537,7 +1567,7 @@ def _get_test_protocol_factory() -> IProtocolFactory: return server_factory -def _log_request(request: str): +def _log_request(request: str) -> None: """Implements Factory.log, which is expected by Request.finish""" logger.info(f"Completed request {request}") @@ -1547,6 +1577,8 @@ class TrustingTLSPolicyForHTTPS: """An IPolicyForHTTPS which checks that the certificate belongs to the right server, but doesn't check the certificate chain.""" - def creatorForNetloc(self, hostname, port): + def creatorForNetloc( + self, hostname: bytes, port: int + ) -> IOpenSSLClientConnectionCreator: certificateOptions = OpenSSLCertificateOptions() return ClientTLSOptions(hostname, certificateOptions.getContext()) diff --git a/tests/http/federation/test_srv_resolver.py b/tests/http/federation/test_srv_resolver.py index 77ce8432ac..7748f56ee6 100644 --- a/tests/http/federation/test_srv_resolver.py +++ b/tests/http/federation/test_srv_resolver.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from typing import Dict, Generator, List, Tuple, cast from unittest.mock import Mock from twisted.internet import defer @@ -20,7 +20,7 @@ from twisted.internet.defer import Deferred from twisted.internet.error import ConnectError from twisted.names import dns, error -from synapse.http.federation.srv_resolver import SrvResolver +from synapse.http.federation.srv_resolver import Server, SrvResolver from synapse.logging.context import LoggingContext, current_context from tests import unittest @@ -28,7 +28,7 @@ from tests.utils import MockClock class SrvResolverTestCase(unittest.TestCase): - def test_resolve(self): + def test_resolve(self) -> None: dns_client_mock = Mock() service_name = b"test_service.example.com" @@ -38,18 +38,19 @@ class SrvResolverTestCase(unittest.TestCase): type=dns.SRV, payload=dns.Record_SRV(target=host_name) ) - result_deferred = Deferred() + result_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred() dns_client_mock.lookupService.return_value = result_deferred - cache = {} + cache: Dict[bytes, List[Server]] = {} resolver = SrvResolver(dns_client=dns_client_mock, cache=cache) @defer.inlineCallbacks - def do_lookup(): + def do_lookup() -> Generator["Deferred[object]", object, List[Server]]: with LoggingContext("one") as ctx: resolve_d = resolver.resolve_service(service_name) - result = yield defer.ensureDeferred(resolve_d) + result: List[Server] + result = yield defer.ensureDeferred(resolve_d) # type: ignore[assignment] # should have restored our context self.assertIs(current_context(), ctx) @@ -70,7 +71,9 @@ class SrvResolverTestCase(unittest.TestCase): self.assertEqual(servers[0].host, host_name) @defer.inlineCallbacks - def test_from_cache_expired_and_dns_fail(self): + def test_from_cache_expired_and_dns_fail( + self, + ) -> Generator["Deferred[object]", object, None]: dns_client_mock = Mock() dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError()) @@ -81,10 +84,13 @@ class SrvResolverTestCase(unittest.TestCase): entry.priority = 0 entry.weight = 0 - cache = {service_name: [entry]} + cache = {service_name: [cast(Server, entry)]} resolver = SrvResolver(dns_client=dns_client_mock, cache=cache) - servers = yield defer.ensureDeferred(resolver.resolve_service(service_name)) + servers: List[Server] + servers = yield defer.ensureDeferred( + resolver.resolve_service(service_name) + ) # type: ignore[assignment] dns_client_mock.lookupService.assert_called_once_with(service_name) @@ -92,7 +98,7 @@ class SrvResolverTestCase(unittest.TestCase): self.assertEqual(servers, cache[service_name]) @defer.inlineCallbacks - def test_from_cache(self): + def test_from_cache(self) -> Generator["Deferred[object]", object, None]: clock = MockClock() dns_client_mock = Mock(spec_set=["lookupService"]) @@ -105,12 +111,15 @@ class SrvResolverTestCase(unittest.TestCase): entry.priority = 0 entry.weight = 0 - cache = {service_name: [entry]} + cache = {service_name: [cast(Server, entry)]} resolver = SrvResolver( dns_client=dns_client_mock, cache=cache, get_time=clock.time ) - servers = yield defer.ensureDeferred(resolver.resolve_service(service_name)) + servers: List[Server] + servers = yield defer.ensureDeferred( + resolver.resolve_service(service_name) + ) # type: ignore[assignment] self.assertFalse(dns_client_mock.lookupService.called) @@ -118,45 +127,48 @@ class SrvResolverTestCase(unittest.TestCase): self.assertEqual(servers, cache[service_name]) @defer.inlineCallbacks - def test_empty_cache(self): + def test_empty_cache(self) -> Generator["Deferred[object]", object, None]: dns_client_mock = Mock() dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError()) service_name = b"test_service.example.com" - cache = {} + cache: Dict[bytes, List[Server]] = {} resolver = SrvResolver(dns_client=dns_client_mock, cache=cache) with self.assertRaises(error.DNSServerError): yield defer.ensureDeferred(resolver.resolve_service(service_name)) @defer.inlineCallbacks - def test_name_error(self): + def test_name_error(self) -> Generator["Deferred[object]", object, None]: dns_client_mock = Mock() dns_client_mock.lookupService.return_value = defer.fail(error.DNSNameError()) service_name = b"test_service.example.com" - cache = {} + cache: Dict[bytes, List[Server]] = {} resolver = SrvResolver(dns_client=dns_client_mock, cache=cache) - servers = yield defer.ensureDeferred(resolver.resolve_service(service_name)) + servers: List[Server] + servers = yield defer.ensureDeferred( + resolver.resolve_service(service_name) + ) # type: ignore[assignment] self.assertEqual(len(servers), 0) self.assertEqual(len(cache), 0) - def test_disabled_service(self): + def test_disabled_service(self) -> None: """ test the behaviour when there is a single record which is ".". """ service_name = b"test_service.example.com" - lookup_deferred = Deferred() + lookup_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred() dns_client_mock = Mock() dns_client_mock.lookupService.return_value = lookup_deferred - cache = {} + cache: Dict[bytes, List[Server]] = {} resolver = SrvResolver(dns_client=dns_client_mock, cache=cache) # Old versions of Twisted don't have an ensureDeferred in failureResultOf. @@ -173,16 +185,16 @@ class SrvResolverTestCase(unittest.TestCase): self.failureResultOf(resolve_d, ConnectError) - def test_non_srv_answer(self): + def test_non_srv_answer(self) -> None: """ test the behaviour when the dns server gives us a spurious non-SRV response """ service_name = b"test_service.example.com" - lookup_deferred = Deferred() + lookup_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred() dns_client_mock = Mock() dns_client_mock.lookupService.return_value = lookup_deferred - cache = {} + cache: Dict[bytes, List[Server]] = {} resolver = SrvResolver(dns_client=dns_client_mock, cache=cache) # Old versions of Twisted don't have an ensureDeferred in successResultOf. diff --git a/tests/http/server/_base.py b/tests/http/server/_base.py index 5071f83574..36472e57a8 100644 --- a/tests/http/server/_base.py +++ b/tests/http/server/_base.py @@ -556,6 +556,6 @@ def _get_stack_frame_method_name(frame_info: inspect.FrameInfo) -> str: return method_name -def _hash_stack(stack: List[inspect.FrameInfo]): +def _hash_stack(stack: List[inspect.FrameInfo]) -> Tuple[str, ...]: """Turns a stack into a hashable value that can be put into a set.""" return tuple(_format_stack_frame(frame) for frame in stack) diff --git a/tests/http/test_additional_resource.py b/tests/http/test_additional_resource.py index 391196425c..ec6aacf235 100644 --- a/tests/http/test_additional_resource.py +++ b/tests/http/test_additional_resource.py @@ -11,28 +11,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any +from twisted.web.server import Request from synapse.http.additional_resource import AdditionalResource from synapse.http.server import respond_with_json +from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from tests.server import FakeSite, make_request from tests.unittest import HomeserverTestCase class _AsyncTestCustomEndpoint: - def __init__(self, config, module_api): + def __init__(self, config: JsonDict, module_api: Any) -> None: pass - async def handle_request(self, request): + async def handle_request(self, request: Request) -> None: + assert isinstance(request, SynapseRequest) respond_with_json(request, 200, {"some_key": "some_value_async"}) class _SyncTestCustomEndpoint: - def __init__(self, config, module_api): + def __init__(self, config: JsonDict, module_api: Any) -> None: pass - async def handle_request(self, request): + async def handle_request(self, request: Request) -> None: + assert isinstance(request, SynapseRequest) respond_with_json(request, 200, {"some_key": "some_value_sync"}) @@ -41,7 +47,7 @@ class AdditionalResourceTests(HomeserverTestCase): and async handlers. """ - def test_async(self): + def test_async(self) -> None: handler = _AsyncTestCustomEndpoint({}, None).handle_request resource = AdditionalResource(self.hs, handler) @@ -52,7 +58,7 @@ class AdditionalResourceTests(HomeserverTestCase): self.assertEqual(channel.code, 200) self.assertEqual(channel.json_body, {"some_key": "some_value_async"}) - def test_sync(self): + def test_sync(self) -> None: handler = _SyncTestCustomEndpoint({}, None).handle_request resource = AdditionalResource(self.hs, handler) diff --git a/tests/http/test_client.py b/tests/http/test_client.py index 7e2f2a01cc..9cfe1ad0de 100644 --- a/tests/http/test_client.py +++ b/tests/http/test_client.py @@ -13,10 +13,12 @@ # limitations under the License. from io import BytesIO +from typing import Tuple, Union from unittest.mock import Mock from netaddr import IPSet +from twisted.internet.defer import Deferred from twisted.internet.error import DNSLookupError from twisted.python.failure import Failure from twisted.test.proto_helpers import AccumulatingProtocol @@ -28,6 +30,7 @@ from synapse.http.client import ( BlacklistingAgentWrapper, BlacklistingReactorWrapper, BodyExceededMaxSize, + _DiscardBodyWithMaxSizeProtocol, read_body_with_max_size, ) @@ -36,7 +39,9 @@ from tests.unittest import TestCase class ReadBodyWithMaxSizeTests(TestCase): - def _build_response(self, length=UNKNOWN_LENGTH): + def _build_response( + self, length: Union[int, str] = UNKNOWN_LENGTH + ) -> Tuple[BytesIO, "Deferred[int]", _DiscardBodyWithMaxSizeProtocol]: """Start reading the body, returns the response, result and proto""" response = Mock(length=length) result = BytesIO() @@ -48,23 +53,27 @@ class ReadBodyWithMaxSizeTests(TestCase): return result, deferred, protocol - def _assert_error(self, deferred, protocol): + def _assert_error( + self, deferred: "Deferred[int]", protocol: _DiscardBodyWithMaxSizeProtocol + ) -> None: """Ensure that the expected error is received.""" - self.assertIsInstance(deferred.result, Failure) + assert isinstance(deferred.result, Failure) self.assertIsInstance(deferred.result.value, BodyExceededMaxSize) - protocol.transport.abortConnection.assert_called_once() + assert protocol.transport is not None + # type-ignore: presumably abortConnection has been replaced with a Mock. + protocol.transport.abortConnection.assert_called_once() # type: ignore[attr-defined] - def _cleanup_error(self, deferred): + def _cleanup_error(self, deferred: "Deferred[int]") -> None: """Ensure that the error in the Deferred is handled gracefully.""" called = [False] - def errback(f): + def errback(f: Failure) -> None: called[0] = True deferred.addErrback(errback) self.assertTrue(called[0]) - def test_no_error(self): + def test_no_error(self) -> None: """A response that is NOT too large.""" result, deferred, protocol = self._build_response() @@ -76,7 +85,7 @@ class ReadBodyWithMaxSizeTests(TestCase): self.assertEqual(result.getvalue(), b"12345") self.assertEqual(deferred.result, 5) - def test_too_large(self): + def test_too_large(self) -> None: """A response which is too large raises an exception.""" result, deferred, protocol = self._build_response() @@ -87,7 +96,7 @@ class ReadBodyWithMaxSizeTests(TestCase): self._assert_error(deferred, protocol) self._cleanup_error(deferred) - def test_multiple_packets(self): + def test_multiple_packets(self) -> None: """Data should be accumulated through mutliple packets.""" result, deferred, protocol = self._build_response() @@ -100,7 +109,7 @@ class ReadBodyWithMaxSizeTests(TestCase): self.assertEqual(result.getvalue(), b"1234") self.assertEqual(deferred.result, 4) - def test_additional_data(self): + def test_additional_data(self) -> None: """A connection can receive data after being closed.""" result, deferred, protocol = self._build_response() @@ -115,7 +124,7 @@ class ReadBodyWithMaxSizeTests(TestCase): self._assert_error(deferred, protocol) self._cleanup_error(deferred) - def test_content_length(self): + def test_content_length(self) -> None: """The body shouldn't be read (at all) if the Content-Length header is too large.""" result, deferred, protocol = self._build_response(length=10) @@ -132,7 +141,7 @@ class ReadBodyWithMaxSizeTests(TestCase): class BlacklistingAgentTest(TestCase): - def setUp(self): + def setUp(self) -> None: self.reactor, self.clock = get_clock() self.safe_domain, self.safe_ip = b"safe.test", b"1.2.3.4" @@ -151,7 +160,7 @@ class BlacklistingAgentTest(TestCase): self.ip_whitelist = IPSet([self.allowed_ip.decode()]) self.ip_blacklist = IPSet(["5.0.0.0/8"]) - def test_reactor(self): + def test_reactor(self) -> None: """Apply the blacklisting reactor and ensure it properly blocks connections to particular domains and IPs.""" agent = Agent( BlacklistingReactorWrapper( @@ -197,7 +206,7 @@ class BlacklistingAgentTest(TestCase): response = self.successResultOf(d) self.assertEqual(response.code, 200) - def test_agent(self): + def test_agent(self) -> None: """Apply the blacklisting agent and ensure it properly blocks connections to particular IPs.""" agent = BlacklistingAgentWrapper( Agent(self.reactor), diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py index a801f002a0..8c18e56881 100644 --- a/tests/http/test_endpoint.py +++ b/tests/http/test_endpoint.py @@ -17,7 +17,7 @@ from tests import unittest class ServerNameTestCase(unittest.TestCase): - def test_parse_server_name(self): + def test_parse_server_name(self) -> None: test_data = { "localhost": ("localhost", None), "my-example.com:1234": ("my-example.com", 1234), @@ -32,7 +32,7 @@ class ServerNameTestCase(unittest.TestCase): for i, o in test_data.items(): self.assertEqual(parse_server_name(i), o) - def test_validate_bad_server_names(self): + def test_validate_bad_server_names(self) -> None: test_data = [ "", # empty "localhost:http", # non-numeric port diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py index be9eaf34e8..fdd22a8e94 100644 --- a/tests/http/test_matrixfederationclient.py +++ b/tests/http/test_matrixfederationclient.py @@ -11,16 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from typing import Generator from unittest.mock import Mock from netaddr import IPSet from parameterized import parameterized from twisted.internet import defer -from twisted.internet.defer import TimeoutError +from twisted.internet.defer import Deferred, TimeoutError from twisted.internet.error import ConnectingCancelledError, DNSLookupError -from twisted.test.proto_helpers import StringTransport +from twisted.test.proto_helpers import MemoryReactor, StringTransport from twisted.web.client import ResponseNeverReceived from twisted.web.http import HTTPChannel @@ -30,34 +30,43 @@ from synapse.http.matrixfederationclient import ( MatrixFederationHttpClient, MatrixFederationRequest, ) -from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context +from synapse.logging.context import ( + SENTINEL_CONTEXT, + LoggingContext, + LoggingContextOrSentinel, + current_context, +) +from synapse.server import HomeServer +from synapse.util import Clock from tests.server import FakeTransport from tests.unittest import HomeserverTestCase -def check_logcontext(context): +def check_logcontext(context: LoggingContextOrSentinel) -> None: current = current_context() if current is not context: raise AssertionError("Expected logcontext %s but was %s" % (context, current)) class FederationClientTests(HomeserverTestCase): - def make_homeserver(self, reactor, clock): + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: hs = self.setup_test_homeserver(reactor=reactor, clock=clock) return hs - def prepare(self, reactor, clock, homeserver): + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: self.cl = MatrixFederationHttpClient(self.hs, None) self.reactor.lookups["testserv"] = "1.2.3.4" - def test_client_get(self): + def test_client_get(self) -> None: """ happy-path test of a GET request """ @defer.inlineCallbacks - def do_request(): + def do_request() -> Generator["Deferred[object]", object, object]: with LoggingContext("one") as context: fetch_d = defer.ensureDeferred( self.cl.get_json("testserv:8008", "foo/bar") @@ -119,7 +128,7 @@ class FederationClientTests(HomeserverTestCase): # check the response is as expected self.assertEqual(res, {"a": 1}) - def test_dns_error(self): + def test_dns_error(self) -> None: """ If the DNS lookup returns an error, it will bubble up. """ @@ -132,7 +141,7 @@ class FederationClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestSendFailed) self.assertIsInstance(f.value.inner_exception, DNSLookupError) - def test_client_connection_refused(self): + def test_client_connection_refused(self) -> None: d = defer.ensureDeferred( self.cl.get_json("testserv:8008", "foo/bar", timeout=10000) ) @@ -156,7 +165,7 @@ class FederationClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestSendFailed) self.assertIs(f.value.inner_exception, e) - def test_client_never_connect(self): + def test_client_never_connect(self) -> None: """ If the HTTP request is not connected and is timed out, it'll give a ConnectingCancelledError or TimeoutError. @@ -188,7 +197,7 @@ class FederationClientTests(HomeserverTestCase): f.value.inner_exception, (ConnectingCancelledError, TimeoutError) ) - def test_client_connect_no_response(self): + def test_client_connect_no_response(self) -> None: """ If the HTTP request is connected, but gets no response before being timed out, it'll give a ResponseNeverReceived. @@ -222,7 +231,7 @@ class FederationClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestSendFailed) self.assertIsInstance(f.value.inner_exception, ResponseNeverReceived) - def test_client_ip_range_blacklist(self): + def test_client_ip_range_blacklist(self) -> None: """Ensure that Synapse does not try to connect to blacklisted IPs""" # Set up the ip_range blacklist @@ -292,7 +301,7 @@ class FederationClientTests(HomeserverTestCase): f = self.failureResultOf(d, RequestSendFailed) self.assertIsInstance(f.value.inner_exception, ConnectingCancelledError) - def test_client_gets_headers(self): + def test_client_gets_headers(self) -> None: """ Once the client gets the headers, _request returns successfully. """ @@ -319,7 +328,7 @@ class FederationClientTests(HomeserverTestCase): self.assertEqual(r.code, 200) @parameterized.expand(["get_json", "post_json", "delete_json", "put_json"]) - def test_timeout_reading_body(self, method_name: str): + def test_timeout_reading_body(self, method_name: str) -> None: """ If the HTTP request is connected, but gets no response before being timed out, it'll give a RequestSendFailed with can_retry. @@ -351,7 +360,7 @@ class FederationClientTests(HomeserverTestCase): self.assertTrue(f.value.can_retry) self.assertIsInstance(f.value.inner_exception, defer.TimeoutError) - def test_client_requires_trailing_slashes(self): + def test_client_requires_trailing_slashes(self) -> None: """ If a connection is made to a client but the client rejects it due to requiring a trailing slash. We need to retry the request with a @@ -405,7 +414,7 @@ class FederationClientTests(HomeserverTestCase): r = self.successResultOf(d) self.assertEqual(r, {}) - def test_client_does_not_retry_on_400_plus(self): + def test_client_does_not_retry_on_400_plus(self) -> None: """ Another test for trailing slashes but now test that we don't retry on trailing slashes on a non-400/M_UNRECOGNIZED response. @@ -450,7 +459,7 @@ class FederationClientTests(HomeserverTestCase): # We should get a 404 failure response self.failureResultOf(d) - def test_client_sends_body(self): + def test_client_sends_body(self) -> None: defer.ensureDeferred( self.cl.post_json( "testserv:8008", "foo/bar", timeout=10000, data={"a": "b"} @@ -474,7 +483,7 @@ class FederationClientTests(HomeserverTestCase): content = request.content.read() self.assertEqual(content, b'{"a":"b"}') - def test_closes_connection(self): + def test_closes_connection(self) -> None: """Check that the client closes unused HTTP connections""" d = defer.ensureDeferred(self.cl.get_json("testserv:8008", "foo/bar")) @@ -514,7 +523,7 @@ class FederationClientTests(HomeserverTestCase): self.assertTrue(conn.disconnecting) @parameterized.expand([(b"",), (b"foo",), (b'{"a": Infinity}',)]) - def test_json_error(self, return_value): + def test_json_error(self, return_value: bytes) -> None: """ Test what happens if invalid JSON is returned from the remote endpoint. """ @@ -560,7 +569,7 @@ class FederationClientTests(HomeserverTestCase): f = self.failureResultOf(test_d) self.assertIsInstance(f.value, RequestSendFailed) - def test_too_big(self): + def test_too_big(self) -> None: """ Test what happens if a huge response is returned from the remote endpoint. """ diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index 2db77c6a73..a817940730 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -14,7 +14,7 @@ import base64 import logging import os -from typing import Iterable, Optional +from typing import List, Optional from unittest.mock import patch import treq @@ -22,7 +22,11 @@ from netaddr import IPSet from parameterized import parameterized from twisted.internet import interfaces # noqa: F401 -from twisted.internet.endpoints import HostnameEndpoint, _WrapperEndpoint +from twisted.internet.endpoints import ( + HostnameEndpoint, + _WrapperEndpoint, + _WrappingProtocol, +) from twisted.internet.interfaces import IProtocol, IProtocolFactory from twisted.internet.protocol import Factory from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol @@ -32,7 +36,11 @@ from synapse.http.client import BlacklistingReactorWrapper from synapse.http.connectproxyclient import ProxyCredentials from synapse.http.proxyagent import ProxyAgent, parse_proxy -from tests.http import TestServerTLSConnectionFactory, get_test_https_policy +from tests.http import ( + TestServerTLSConnectionFactory, + dummy_address, + get_test_https_policy, +) from tests.server import FakeTransport, ThreadedMemoryReactorClock from tests.unittest import TestCase @@ -183,7 +191,7 @@ class ProxyParserTests(TestCase): expected_hostname: bytes, expected_port: int, expected_credentials: Optional[bytes], - ): + ) -> None: """ Tests that a given proxy URL will be broken into the components. Args: @@ -209,7 +217,7 @@ class ProxyParserTests(TestCase): class MatrixFederationAgentTests(TestCase): - def setUp(self): + def setUp(self) -> None: self.reactor = ThreadedMemoryReactorClock() def _make_connection( @@ -218,7 +226,7 @@ class MatrixFederationAgentTests(TestCase): server_factory: IProtocolFactory, ssl: bool = False, expected_sni: Optional[bytes] = None, - tls_sanlist: Optional[Iterable[bytes]] = None, + tls_sanlist: Optional[List[bytes]] = None, ) -> IProtocol: """Builds a test server, and completes the outgoing client connection @@ -244,7 +252,8 @@ class MatrixFederationAgentTests(TestCase): if ssl: server_factory = _wrap_server_factory_for_tls(server_factory, tls_sanlist) - server_protocol = server_factory.buildProtocol(None) + server_protocol = server_factory.buildProtocol(dummy_address) + assert server_protocol is not None # now, tell the client protocol factory to build the client protocol, # and wire the output of said protocol up to the server via @@ -252,7 +261,8 @@ class MatrixFederationAgentTests(TestCase): # # Normally this would be done by the TCP socket code in Twisted, but we are # stubbing that out here. - client_protocol = client_factory.buildProtocol(None) + client_protocol = client_factory.buildProtocol(dummy_address) + assert client_protocol is not None client_protocol.makeConnection( FakeTransport(server_protocol, self.reactor, client_protocol) ) @@ -263,6 +273,7 @@ class MatrixFederationAgentTests(TestCase): ) if ssl: + assert isinstance(server_protocol, TLSMemoryBIOProtocol) http_protocol = server_protocol.wrappedProtocol tls_connection = server_protocol._tlsConnection else: @@ -288,7 +299,7 @@ class MatrixFederationAgentTests(TestCase): scheme: bytes, hostname: bytes, path: bytes, - ): + ) -> None: """Runs a test case for a direct connection not going through a proxy. Args: @@ -319,6 +330,7 @@ class MatrixFederationAgentTests(TestCase): ssl=is_https, expected_sni=hostname if is_https else None, ) + assert isinstance(http_server, HTTPChannel) # the FakeTransport is async, so we need to pump the reactor self.reactor.advance(0) @@ -339,34 +351,34 @@ class MatrixFederationAgentTests(TestCase): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") - def test_http_request(self): + def test_http_request(self) -> None: agent = ProxyAgent(self.reactor) self._test_request_direct_connection(agent, b"http", b"test.com", b"") - def test_https_request(self): + def test_https_request(self) -> None: agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy()) self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") - def test_http_request_use_proxy_empty_environment(self): + def test_http_request_use_proxy_empty_environment(self) -> None: agent = ProxyAgent(self.reactor, use_proxy=True) self._test_request_direct_connection(agent, b"http", b"test.com", b"") @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "NO_PROXY": "test.com"}) - def test_http_request_via_uppercase_no_proxy(self): + def test_http_request_via_uppercase_no_proxy(self) -> None: agent = ProxyAgent(self.reactor, use_proxy=True) self._test_request_direct_connection(agent, b"http", b"test.com", b"") @patch.dict( os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "test.com,unused.com"} ) - def test_http_request_via_no_proxy(self): + def test_http_request_via_no_proxy(self) -> None: agent = ProxyAgent(self.reactor, use_proxy=True) self._test_request_direct_connection(agent, b"http", b"test.com", b"") @patch.dict( os.environ, {"https_proxy": "proxy.com", "no_proxy": "test.com,unused.com"} ) - def test_https_request_via_no_proxy(self): + def test_https_request_via_no_proxy(self) -> None: agent = ProxyAgent( self.reactor, contextFactory=get_test_https_policy(), @@ -375,12 +387,12 @@ class MatrixFederationAgentTests(TestCase): self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "*"}) - def test_http_request_via_no_proxy_star(self): + def test_http_request_via_no_proxy_star(self) -> None: agent = ProxyAgent(self.reactor, use_proxy=True) self._test_request_direct_connection(agent, b"http", b"test.com", b"") @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "*"}) - def test_https_request_via_no_proxy_star(self): + def test_https_request_via_no_proxy_star(self) -> None: agent = ProxyAgent( self.reactor, contextFactory=get_test_https_policy(), @@ -389,7 +401,7 @@ class MatrixFederationAgentTests(TestCase): self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"}) - def test_http_request_via_proxy(self): + def test_http_request_via_proxy(self) -> None: """ Tests that requests can be made through a proxy. """ @@ -401,7 +413,7 @@ class MatrixFederationAgentTests(TestCase): os.environ, {"http_proxy": "bob:pinkponies@proxy.com:8888", "no_proxy": "unused.com"}, ) - def test_http_request_via_proxy_with_auth(self): + def test_http_request_via_proxy_with_auth(self) -> None: """ Tests that authenticated requests can be made through a proxy. """ @@ -412,7 +424,7 @@ class MatrixFederationAgentTests(TestCase): @patch.dict( os.environ, {"http_proxy": "https://proxy.com:8888", "no_proxy": "unused.com"} ) - def test_http_request_via_https_proxy(self): + def test_http_request_via_https_proxy(self) -> None: self._do_http_request_via_proxy( expect_proxy_ssl=True, expected_auth_credentials=None ) @@ -424,13 +436,13 @@ class MatrixFederationAgentTests(TestCase): "no_proxy": "unused.com", }, ) - def test_http_request_via_https_proxy_with_auth(self): + def test_http_request_via_https_proxy_with_auth(self) -> None: self._do_http_request_via_proxy( expect_proxy_ssl=True, expected_auth_credentials=b"bob:pinkponies" ) @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"}) - def test_https_request_via_proxy(self): + def test_https_request_via_proxy(self) -> None: """Tests that TLS-encrypted requests can be made through a proxy""" self._do_https_request_via_proxy( expect_proxy_ssl=False, expected_auth_credentials=None @@ -440,7 +452,7 @@ class MatrixFederationAgentTests(TestCase): os.environ, {"https_proxy": "bob:pinkponies@proxy.com", "no_proxy": "unused.com"}, ) - def test_https_request_via_proxy_with_auth(self): + def test_https_request_via_proxy_with_auth(self) -> None: """Tests that authenticated, TLS-encrypted requests can be made through a proxy""" self._do_https_request_via_proxy( expect_proxy_ssl=False, expected_auth_credentials=b"bob:pinkponies" @@ -449,7 +461,7 @@ class MatrixFederationAgentTests(TestCase): @patch.dict( os.environ, {"https_proxy": "https://proxy.com", "no_proxy": "unused.com"} ) - def test_https_request_via_https_proxy(self): + def test_https_request_via_https_proxy(self) -> None: """Tests that TLS-encrypted requests can be made through a proxy""" self._do_https_request_via_proxy( expect_proxy_ssl=True, expected_auth_credentials=None @@ -459,7 +471,7 @@ class MatrixFederationAgentTests(TestCase): os.environ, {"https_proxy": "https://bob:pinkponies@proxy.com", "no_proxy": "unused.com"}, ) - def test_https_request_via_https_proxy_with_auth(self): + def test_https_request_via_https_proxy_with_auth(self) -> None: """Tests that authenticated, TLS-encrypted requests can be made through a proxy""" self._do_https_request_via_proxy( expect_proxy_ssl=True, expected_auth_credentials=b"bob:pinkponies" @@ -469,7 +481,7 @@ class MatrixFederationAgentTests(TestCase): self, expect_proxy_ssl: bool = False, expected_auth_credentials: Optional[bytes] = None, - ): + ) -> None: """Send a http request via an agent and check that it is correctly received at the proxy. The proxy can use either http or https. Args: @@ -501,6 +513,7 @@ class MatrixFederationAgentTests(TestCase): tls_sanlist=[b"DNS:proxy.com"] if expect_proxy_ssl else None, expected_sni=b"proxy.com" if expect_proxy_ssl else None, ) + assert isinstance(http_server, HTTPChannel) # the FakeTransport is async, so we need to pump the reactor self.reactor.advance(0) @@ -542,7 +555,7 @@ class MatrixFederationAgentTests(TestCase): self, expect_proxy_ssl: bool = False, expected_auth_credentials: Optional[bytes] = None, - ): + ) -> None: """Send a https request via an agent and check that it is correctly received at the proxy and client. The proxy can use either http or https. Args: @@ -606,10 +619,12 @@ class MatrixFederationAgentTests(TestCase): # now we make another test server to act as the upstream HTTP server. server_ssl_protocol = _wrap_server_factory_for_tls( _get_test_protocol_factory() - ).buildProtocol(None) + ).buildProtocol(dummy_address) + assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol) # Tell the HTTP server to send outgoing traffic back via the proxy's transport. proxy_server_transport = proxy_server.transport + assert proxy_server_transport is not None server_ssl_protocol.makeConnection(proxy_server_transport) # ... and replace the protocol on the proxy's transport with the @@ -644,6 +659,7 @@ class MatrixFederationAgentTests(TestCase): # now there should be a pending request http_server = server_ssl_protocol.wrappedProtocol + assert isinstance(http_server, HTTPChannel) self.assertEqual(len(http_server.requests), 1) request = http_server.requests[0] @@ -667,7 +683,7 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(body, b"result") @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"}) - def test_http_request_via_proxy_with_blacklist(self): + def test_http_request_via_proxy_with_blacklist(self) -> None: # The blacklist includes the configured proxy IP. agent = ProxyAgent( BlacklistingReactorWrapper( @@ -691,6 +707,7 @@ class MatrixFederationAgentTests(TestCase): http_server = self._make_connection( client_factory, _get_test_protocol_factory() ) + assert isinstance(http_server, HTTPChannel) # the FakeTransport is async, so we need to pump the reactor self.reactor.advance(0) @@ -712,7 +729,7 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(body, b"result") @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"}) - def test_https_request_via_uppercase_proxy_with_blacklist(self): + def test_https_request_via_uppercase_proxy_with_blacklist(self) -> None: # The blacklist includes the configured proxy IP. agent = ProxyAgent( BlacklistingReactorWrapper( @@ -737,11 +754,15 @@ class MatrixFederationAgentTests(TestCase): proxy_server = self._make_connection( client_factory, _get_test_protocol_factory() ) + assert isinstance(proxy_server, HTTPChannel) # fish the transports back out so that we can do the old switcheroo s2c_transport = proxy_server.transport + assert isinstance(s2c_transport, FakeTransport) client_protocol = s2c_transport.other + assert isinstance(client_protocol, _WrappingProtocol) c2s_transport = client_protocol.transport + assert isinstance(c2s_transport, FakeTransport) # the FakeTransport is async, so we need to pump the reactor self.reactor.advance(0) @@ -762,8 +783,10 @@ class MatrixFederationAgentTests(TestCase): # now we can replace the proxy channel with a new, SSL-wrapped HTTP channel ssl_factory = _wrap_server_factory_for_tls(_get_test_protocol_factory()) - ssl_protocol = ssl_factory.buildProtocol(None) + ssl_protocol = ssl_factory.buildProtocol(dummy_address) + assert isinstance(ssl_protocol, TLSMemoryBIOProtocol) http_server = ssl_protocol.wrappedProtocol + assert isinstance(http_server, HTTPChannel) ssl_protocol.makeConnection( FakeTransport(client_protocol, self.reactor, ssl_protocol) @@ -797,28 +820,28 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(body, b"result") @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"}) - def test_proxy_with_no_scheme(self): + def test_proxy_with_no_scheme(self) -> None: http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - self.assertIsInstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) + assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com") self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888) @patch.dict(os.environ, {"http_proxy": "socks://proxy.com:8888"}) - def test_proxy_with_unsupported_scheme(self): + def test_proxy_with_unsupported_scheme(self) -> None: with self.assertRaises(ValueError): ProxyAgent(self.reactor, use_proxy=True) @patch.dict(os.environ, {"http_proxy": "http://proxy.com:8888"}) - def test_proxy_with_http_scheme(self): + def test_proxy_with_http_scheme(self) -> None: http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - self.assertIsInstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) + assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com") self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888) @patch.dict(os.environ, {"http_proxy": "https://proxy.com:8888"}) - def test_proxy_with_https_scheme(self): + def test_proxy_with_https_scheme(self) -> None: https_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - self.assertIsInstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint) + assert isinstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint) self.assertEqual( https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._hostStr, "proxy.com" ) @@ -828,7 +851,7 @@ class MatrixFederationAgentTests(TestCase): def _wrap_server_factory_for_tls( - factory: IProtocolFactory, sanlist: Iterable[bytes] = None + factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None ) -> IProtocolFactory: """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory @@ -865,6 +888,6 @@ def _get_test_protocol_factory() -> IProtocolFactory: return server_factory -def _log_request(request: str): +def _log_request(request: str) -> None: """Implements Factory.log, which is expected by Request.finish""" logger.info(f"Completed request {request}") diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py index 46166292fe..c8d215b6dc 100644 --- a/tests/http/test_servlet.py +++ b/tests/http/test_servlet.py @@ -14,7 +14,7 @@ import json from http import HTTPStatus from io import BytesIO -from typing import Tuple +from typing import Tuple, Union from unittest.mock import Mock from synapse.api.errors import Codes, SynapseError @@ -33,7 +33,7 @@ from tests import unittest from tests.http.server._base import test_disconnect -def make_request(content): +def make_request(content: Union[bytes, JsonDict]) -> Mock: """Make an object that acts enough like a request.""" request = Mock(spec=["method", "uri", "content"]) @@ -47,7 +47,7 @@ def make_request(content): class TestServletUtils(unittest.TestCase): - def test_parse_json_value(self): + def test_parse_json_value(self) -> None: """Basic tests for parse_json_value_from_request.""" # Test round-tripping. obj = {"foo": 1} @@ -78,7 +78,7 @@ class TestServletUtils(unittest.TestCase): with self.assertRaises(SynapseError): parse_json_value_from_request(make_request(b'{"foo": Infinity}')) - def test_parse_json_object(self): + def test_parse_json_object(self) -> None: """Basic tests for parse_json_object_from_request.""" # Test empty. result = parse_json_object_from_request( diff --git a/tests/http/test_simple_client.py b/tests/http/test_simple_client.py index c85a3665c1..010601da4b 100644 --- a/tests/http/test_simple_client.py +++ b/tests/http/test_simple_client.py @@ -17,22 +17,24 @@ from netaddr import IPSet from twisted.internet import defer from twisted.internet.error import DNSLookupError +from twisted.test.proto_helpers import MemoryReactor from synapse.http import RequestTimedOutError from synapse.http.client import SimpleHttpClient from synapse.server import HomeServer +from synapse.util import Clock from tests.unittest import HomeserverTestCase class SimpleHttpClientTests(HomeserverTestCase): - def prepare(self, reactor, clock, hs: "HomeServer"): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: "HomeServer") -> None: # Add a DNS entry for a test server self.reactor.lookups["testserv"] = "1.2.3.4" self.cl = hs.get_simple_http_client() - def test_dns_error(self): + def test_dns_error(self) -> None: """ If the DNS lookup returns an error, it will bubble up. """ @@ -42,7 +44,7 @@ class SimpleHttpClientTests(HomeserverTestCase): f = self.failureResultOf(d) self.assertIsInstance(f.value, DNSLookupError) - def test_client_connection_refused(self): + def test_client_connection_refused(self) -> None: d = defer.ensureDeferred(self.cl.get_json("http://testserv:8008/foo/bar")) self.pump() @@ -63,7 +65,7 @@ class SimpleHttpClientTests(HomeserverTestCase): self.assertIs(f.value, e) - def test_client_never_connect(self): + def test_client_never_connect(self) -> None: """ If the HTTP request is not connected and is timed out, it'll give a ConnectingCancelledError or TimeoutError. @@ -90,7 +92,7 @@ class SimpleHttpClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestTimedOutError) - def test_client_connect_no_response(self): + def test_client_connect_no_response(self) -> None: """ If the HTTP request is connected, but gets no response before being timed out, it'll give a ResponseNeverReceived. @@ -121,7 +123,7 @@ class SimpleHttpClientTests(HomeserverTestCase): self.assertIsInstance(f.value, RequestTimedOutError) - def test_client_ip_range_blacklist(self): + def test_client_ip_range_blacklist(self) -> None: """Ensure that Synapse does not try to connect to blacklisted IPs""" # Add some DNS entries we'll blacklist diff --git a/tests/http/test_site.py b/tests/http/test_site.py index b2dbf76d33..9a78fede92 100644 --- a/tests/http/test_site.py +++ b/tests/http/test_site.py @@ -13,18 +13,20 @@ # limitations under the License. from twisted.internet.address import IPv6Address -from twisted.test.proto_helpers import StringTransport +from twisted.test.proto_helpers import MemoryReactor, StringTransport from synapse.app.homeserver import SynapseHomeServer +from synapse.server import HomeServer +from synapse.util import Clock from tests.unittest import HomeserverTestCase class SynapseRequestTestCase(HomeserverTestCase): - def make_homeserver(self, reactor, clock): + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: return self.setup_test_homeserver(homeserver_to_use=SynapseHomeServer) - def test_large_request(self): + def test_large_request(self) -> None: """overlarge HTTP requests should be rejected""" self.hs.start_listening() diff --git a/tests/server.py b/tests/server.py index b1730fcc8d..237bcad8ba 100644 --- a/tests/server.py +++ b/tests/server.py @@ -70,7 +70,7 @@ from synapse.logging.context import ContextResourceUsage from synapse.server import HomeServer from synapse.storage import DataStore from synapse.storage.engines import PostgresEngine, create_engine -from synapse.types import JsonDict +from synapse.types import ISynapseReactor, JsonDict from synapse.util import Clock from tests.utils import ( @@ -401,7 +401,9 @@ def make_request( return channel -@implementer(IReactorPluggableNameResolver) +# ISynapseReactor implies IReactorPluggableNameResolver, but explicitly +# marking this as an implementer of the latter seems to keep mypy-zope happier. +@implementer(IReactorPluggableNameResolver, ISynapseReactor) class ThreadedMemoryReactorClock(MemoryReactorClock): """ A MemoryReactorClock that supports callFromThread. -- cgit 1.5.1 From 5b55c32d610b2baec8622f0418519b130ab4fa30 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 7 Feb 2023 06:56:09 -0500 Subject: Add tests for using _flatten_dict with an event. (#15002) --- changelog.d/15002.misc | 1 + synapse/push/bulk_push_rule_evaluator.py | 13 +++---- tests/push/test_push_rule_evaluator.py | 63 +++++++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 changelog.d/15002.misc (limited to 'synapse') diff --git a/changelog.d/15002.misc b/changelog.d/15002.misc new file mode 100644 index 0000000000..68ac8335fc --- /dev/null +++ b/changelog.d/15002.misc @@ -0,0 +1 @@ +Add tests for `_flatten_dict`. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index f73dceb128..d9c0a98f44 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -36,7 +36,7 @@ from synapse.api.constants import ( Membership, RelationTypes, ) -from synapse.api.room_versions import PushRuleRoomFlag, RoomVersion +from synapse.api.room_versions import PushRuleRoomFlag from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -405,7 +405,7 @@ class BulkPushRuleEvaluator: room_mention = mentions.get("room") is True evaluator = PushRuleEvaluator( - _flatten_dict(event, room_version=event.room_version), + _flatten_dict(event), has_mentions, user_mentions, room_mention, @@ -491,7 +491,6 @@ StateGroup = Union[object, int] def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], - room_version: Optional[RoomVersion] = None, prefix: Optional[List[str]] = None, result: Optional[Dict[str, str]] = None, ) -> Dict[str, str]: @@ -511,7 +510,6 @@ def _flatten_dict( Args: d: The event or content to continue flattening. - room_version: The room version object. prefix: The key prefix (from outer dictionaries). result: The result to mutate. @@ -531,14 +529,13 @@ def _flatten_dict( # `room_version` should only ever be set when looking at the top level of an event if ( - room_version is not None - and PushRuleRoomFlag.EXTENSIBLE_EVENTS in room_version.msc3931_push_features - and isinstance(d, EventBase) + isinstance(d, EventBase) + and PushRuleRoomFlag.EXTENSIBLE_EVENTS in d.room_version.msc3931_push_features ): # Room supports extensible events: replace `content.body` with the plain text # representation from `m.markup`, as per MSC1767. markup = d.get("content").get("m.markup") - if room_version.identifier.startswith("org.matrix.msc1767."): + if d.room_version.identifier.startswith("org.matrix.msc1767."): markup = d.get("content").get("org.matrix.msc1767.markup") if markup is not None and isinstance(markup, list): text = "" diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 7c430c4ecb..da33423871 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -22,7 +22,7 @@ import synapse.rest.admin from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.api.room_versions import RoomVersions from synapse.appservice import ApplicationService -from synapse.events import FrozenEvent +from synapse.events import FrozenEvent, make_event_from_dict from synapse.push.bulk_push_rule_evaluator import _flatten_dict from synapse.push.httppusher import tweaks_for_actions from synapse.rest import admin @@ -60,6 +60,67 @@ class FlattenDictTestCase(unittest.TestCase): } self.assertEqual({"woo": "woo"}, _flatten_dict(input)) + def test_event(self) -> None: + """Events can also be flattened.""" + event = make_event_from_dict( + { + "room_id": "!test:test", + "type": "m.room.message", + "sender": "@alice:test", + "content": { + "msgtype": "m.text", + "body": "Hello world!", + "format": "org.matrix.custom.html", + "formatted_body": "

Hello world!

", + }, + }, + room_version=RoomVersions.V8, + ) + expected = { + "content.msgtype": "m.text", + "content.body": "hello world!", + "content.format": "org.matrix.custom.html", + "content.formatted_body": "

hello world!

", + "room_id": "!test:test", + "sender": "@alice:test", + "type": "m.room.message", + } + self.assertEqual(expected, _flatten_dict(event)) + + def test_extensible_events(self) -> None: + """Extensible events has compatibility behaviour.""" + event_dict = { + "room_id": "!test:test", + "type": "m.room.message", + "sender": "@alice:test", + "content": { + "org.matrix.msc1767.markup": [ + {"mimetype": "text/plain", "body": "Hello world!"}, + {"mimetype": "text/html", "body": "

Hello world!

"}, + ] + }, + } + + # For a current room version, there's no special behavior. + event = make_event_from_dict(event_dict, room_version=RoomVersions.V8) + expected = { + "room_id": "!test:test", + "sender": "@alice:test", + "type": "m.room.message", + } + self.assertEqual(expected, _flatten_dict(event)) + + # For a room version with extensible events, they parse out the text/plain + # to a content.body property. + event = make_event_from_dict(event_dict, room_version=RoomVersions.MSC1767v10) + expected = { + "content.body": "hello world!", + "room_id": "!test:test", + "sender": "@alice:test", + "type": "m.room.message", + } + self.assertEqual(expected, _flatten_dict(event)) + class PushRuleEvaluatorTestCase(unittest.TestCase): def _get_evaluator( -- cgit 1.5.1 From 2dff93099b5aa7e213da76a9c4b3de84385b58e1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 7 Feb 2023 15:24:44 +0000 Subject: Typecheck tests.rest.media.v1.test_media_storage (#15008) * Fix MediaStorage type hint * Typecheck tests.rest.media.v1.test_media_storage * Changelog * Remove assert and make the comment succinct * Fix syntax for olddeps --- changelog.d/15008.misc | 1 + mypy.ini | 1 - synapse/rest/media/v1/media_storage.py | 7 ++--- tests/rest/media/v1/test_media_storage.py | 49 +++++++++++++++++++------------ 4 files changed, 35 insertions(+), 23 deletions(-) create mode 100644 changelog.d/15008.misc (limited to 'synapse') diff --git a/changelog.d/15008.misc b/changelog.d/15008.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/15008.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/mypy.ini b/mypy.ini index 0efafb26b6..4598002c4a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -33,7 +33,6 @@ exclude = (?x) |synapse/storage/schema/ |tests/module_api/test_api.py - |tests/rest/media/v1/test_media_storage.py |tests/server.py )$ diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index a5c3de192f..db25848744 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -46,10 +46,9 @@ from ._base import FileInfo, Responder from .filepath import MediaFilePaths if TYPE_CHECKING: + from synapse.rest.media.v1.storage_provider import StorageProvider from synapse.server import HomeServer - from .storage_provider import StorageProviderWrapper - logger = logging.getLogger(__name__) @@ -68,7 +67,7 @@ class MediaStorage: hs: "HomeServer", local_media_directory: str, filepaths: MediaFilePaths, - storage_providers: Sequence["StorageProviderWrapper"], + storage_providers: Sequence["StorageProvider"], ): self.hs = hs self.reactor = hs.get_reactor() @@ -360,7 +359,7 @@ class ReadableFileWrapper: clock: Clock path: str - async def write_chunks_to(self, callback: Callable[[bytes], None]) -> None: + async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None: """Reads the file in chunks and calls the callback with each chunk.""" with open(self.path, "rb") as file: diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index d18fc13c21..17a3b06a8e 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -16,7 +16,7 @@ import shutil import tempfile from binascii import unhexlify from io import BytesIO -from typing import Any, BinaryIO, Dict, List, Optional, Union +from typing import Any, BinaryIO, ClassVar, Dict, List, Optional, Tuple, Union from unittest.mock import Mock from urllib import parse @@ -32,6 +32,7 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.errors import Codes from synapse.events import EventBase from synapse.events.spamcheck import load_legacy_spam_checkers +from synapse.http.types import QueryParams from synapse.logging.context import make_deferred_yieldable from synapse.module_api import ModuleApi from synapse.rest import admin @@ -41,7 +42,7 @@ from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.rest.media.v1.media_storage import MediaStorage, ReadableFileWrapper from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend from synapse.server import HomeServer -from synapse.types import RoomAlias +from synapse.types import JsonDict, RoomAlias from synapse.util import Clock from tests import unittest @@ -201,36 +202,46 @@ class _TestImage: ], ) class MediaRepoTests(unittest.HomeserverTestCase): - + test_image: ClassVar[_TestImage] hijack_auth = True user_id = "@test:user" def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - self.fetches = [] + self.fetches: List[ + Tuple[ + "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]", + str, + str, + Optional[QueryParams], + ] + ] = [] def get_file( destination: str, path: str, output_stream: BinaryIO, - args: Optional[Dict[str, Union[str, List[str]]]] = None, + args: Optional[QueryParams] = None, + retry_on_dns_fail: bool = True, max_size: Optional[int] = None, - ) -> Deferred: - """ - Returns tuple[int,dict,str,int] of file length, response headers, - absolute URI, and response code. - """ + ignore_backoff: bool = False, + ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]": + """A mock for MatrixFederationHttpClient.get_file.""" - def write_to(r): + def write_to( + r: Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]] + ) -> Tuple[int, Dict[bytes, List[bytes]]]: data, response = r output_stream.write(data) return response - d = Deferred() - d.addCallback(write_to) + d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred() self.fetches.append((d, destination, path, args)) - return make_deferred_yieldable(d) + # Note that this callback changes the value held by d. + d_after_callback = d.addCallback(write_to) + return make_deferred_yieldable(d_after_callback) + # Mock out the homeserver's MatrixFederationHttpClient client = Mock() client.get_file = get_file @@ -461,6 +472,7 @@ class MediaRepoTests(unittest.HomeserverTestCase): # Synapse should regenerate missing thumbnails. origin, media_id = self.media_id.split("/") info = self.get_success(self.store.get_cached_remote_media(origin, media_id)) + assert info is not None file_id = info["filesystem_id"] thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir( @@ -581,7 +593,7 @@ class MediaRepoTests(unittest.HomeserverTestCase): "thumbnail_method": method, "thumbnail_type": self.test_image.content_type, "thumbnail_length": 256, - "filesystem_id": f"thumbnail1{self.test_image.extension}", + "filesystem_id": f"thumbnail1{self.test_image.extension.decode()}", }, { "thumbnail_width": 32, @@ -589,10 +601,10 @@ class MediaRepoTests(unittest.HomeserverTestCase): "thumbnail_method": method, "thumbnail_type": self.test_image.content_type, "thumbnail_length": 256, - "filesystem_id": f"thumbnail2{self.test_image.extension}", + "filesystem_id": f"thumbnail2{self.test_image.extension.decode()}", }, ], - file_id=f"image{self.test_image.extension}", + file_id=f"image{self.test_image.extension.decode()}", url_cache=None, server_name=None, ) @@ -637,6 +649,7 @@ class TestSpamCheckerLegacy: self.config = config self.api = api + @staticmethod def parse_config(config: Dict[str, Any]) -> Dict[str, Any]: return config @@ -748,7 +761,7 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase): async def check_media_file_for_spam( self, file_wrapper: ReadableFileWrapper, file_info: FileInfo - ) -> Union[Codes, Literal["NOT_SPAM"]]: + ) -> Union[Codes, Literal["NOT_SPAM"], Tuple[Codes, JsonDict]]: buf = BytesIO() await file_wrapper.write_chunks_to(buf.write) -- cgit 1.5.1 From 9cd7610f86ab5051c9365dd38d1eec405a5f8ca6 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 7 Feb 2023 15:26:55 +0000 Subject: Revert "Add `event_stream_ordering` column to membership state tables (#14979)" This reverts commit 5fdc12f482c68e2cdbb78d7db5de2cfe621720d4. --- synapse/storage/databases/main/events.py | 23 ++--- .../storage/databases/main/events_bg_updates.py | 104 +-------------------- synapse/storage/databases/main/events_worker.py | 8 +- .../26membership_tables_event_stream_ordering.sql | 21 ----- 4 files changed, 11 insertions(+), 145 deletions(-) delete mode 100644 synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql (limited to 'synapse') diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b6cce0a7cc..1536937b67 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1147,15 +1147,11 @@ class PersistEventsStore: # been inserted into room_memberships. txn.execute_batch( """INSERT INTO current_state_events - (room_id, type, state_key, event_id, membership, event_stream_ordering) - VALUES ( - ?, ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) - ) + (room_id, type, state_key, event_id, membership) + VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?)) """, [ - (room_id, key[0], key[1], ev_id, ev_id, ev_id) + (room_id, key[0], key[1], ev_id, ev_id) for key, ev_id in to_insert.items() ], ) @@ -1182,15 +1178,11 @@ class PersistEventsStore: if to_insert: txn.execute_batch( """INSERT INTO local_current_membership - (room_id, user_id, event_id, membership, event_stream_ordering) - VALUES ( - ?, ?, ?, - (SELECT membership FROM room_memberships WHERE event_id = ?), - (SELECT stream_ordering FROM events WHERE event_id = ?) - ) + (room_id, user_id, event_id, membership) + VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?)) """, [ - (room_id, key[1], ev_id, ev_id, ev_id) + (room_id, key[1], ev_id, ev_id) for key, ev_id in to_insert.items() if key[0] == EventTypes.Member and self.is_mine_id(key[1]) ], @@ -1798,7 +1790,6 @@ class PersistEventsStore: table="room_memberships", keys=( "event_id", - "event_stream_ordering", "user_id", "sender", "room_id", @@ -1809,7 +1800,6 @@ class PersistEventsStore: values=[ ( event.event_id, - event.internal_metadata.stream_ordering, event.state_key, event.user_id, event.room_id, @@ -1842,7 +1832,6 @@ class PersistEventsStore: keyvalues={"room_id": event.room_id, "user_id": event.state_key}, values={ "event_id": event.event_id, - "event_stream_ordering": event.internal_metadata.stream_ordering, "membership": event.membership, }, ) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 0e81d38cca..b9d3c36d60 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Tuple, ca import attr -from synapse.api.constants import EventContentFields, EventTypes, RelationTypes +from synapse.api.constants import EventContentFields, RelationTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import make_event_from_dict from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause @@ -71,10 +71,6 @@ class _BackgroundUpdates: EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index" - POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING = ( - "populate_membership_event_stream_ordering" - ) - @attr.s(slots=True, frozen=True, auto_attribs=True) class _CalculateChainCover: @@ -103,10 +99,6 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): ): super().__init__(database, db_conn, hs) - self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING, - self._populate_membership_event_stream_ordering, - ) self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts, @@ -1506,97 +1498,3 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): ) return batch_size - - async def _populate_membership_event_stream_ordering( - self, progress: JsonDict, batch_size: int - ) -> int: - def _populate_membership_event_stream_ordering( - txn: LoggingTransaction, - ) -> bool: - - if "max_stream_ordering" in progress: - max_stream_ordering = progress["max_stream_ordering"] - else: - txn.execute("SELECT max(stream_ordering) FROM events") - res = txn.fetchone() - if res is None or res[0] is None: - return True - else: - max_stream_ordering = res[0] - - start = progress.get("stream_ordering", 0) - stop = start + batch_size - - sql = f""" - SELECT room_id, event_id, stream_ordering - FROM events - WHERE - type = '{EventTypes.Member}' - AND stream_ordering >= ? - AND stream_ordering < ? - """ - txn.execute(sql, (start, stop)) - - rows: List[Tuple[str, str, int]] = cast( - List[Tuple[str, str, int]], txn.fetchall() - ) - - event_ids: List[Tuple[str]] = [] - event_stream_orderings: List[Tuple[int]] = [] - - for _, event_id, event_stream_ordering in rows: - event_ids.append((event_id,)) - event_stream_orderings.append((event_stream_ordering,)) - - self.db_pool.simple_update_many_txn( - txn, - table="current_state_events", - key_names=("event_id",), - key_values=event_ids, - value_names=("event_stream_ordering",), - value_values=event_stream_orderings, - ) - - self.db_pool.simple_update_many_txn( - txn, - table="room_memberships", - key_names=("event_id",), - key_values=event_ids, - value_names=("event_stream_ordering",), - value_values=event_stream_orderings, - ) - - # NOTE: local_current_membership has no index on event_id, so only - # the room ID here will reduce the query rows read. - for room_id, event_id, event_stream_ordering in rows: - txn.execute( - """ - UPDATE local_current_membership - SET event_stream_ordering = ? - WHERE room_id = ? AND event_id = ? - """, - (event_stream_ordering, room_id, event_id), - ) - - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING, - { - "stream_ordering": stop, - "max_stream_ordering": max_stream_ordering, - }, - ) - - return stop > max_stream_ordering - - finished = await self.db_pool.runInteraction( - "_populate_membership_event_stream_ordering", - _populate_membership_event_stream_ordering, - ) - - if finished: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING - ) - - return batch_size diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 6d0ef10258..d7d08369ca 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1779,7 +1779,7 @@ class EventsWorkerStore(SQLBaseStore): txn: LoggingTransaction, ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]: sql = ( - "SELECT out.event_stream_ordering, e.event_id, e.room_id, e.type," + "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL," " e.outlier" " FROM events AS e" @@ -1791,10 +1791,10 @@ class EventsWorkerStore(SQLBaseStore): " LEFT JOIN event_relations USING (event_id)" " LEFT JOIN room_memberships USING (event_id)" " LEFT JOIN rejections USING (event_id)" - " WHERE ? < out.event_stream_ordering" - " AND out.event_stream_ordering <= ?" + " WHERE ? < event_stream_ordering" + " AND event_stream_ordering <= ?" " AND out.instance_name = ?" - " ORDER BY out.event_stream_ordering ASC" + " ORDER BY event_stream_ordering ASC" ) txn.execute(sql, (last_id, current_id, instance_name)) diff --git a/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql b/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql deleted file mode 100644 index 7c30a67fc4..0000000000 --- a/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright 2022 Beeper - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT; -ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT; -ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT; - -INSERT INTO background_updates (update_name, progress_json) VALUES - ('populate_membership_event_stream_ordering', '{}'); -- cgit 1.5.1 From f10caa73eee0caa91cf373966104d1ededae2aee Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 7 Feb 2023 15:33:33 +0000 Subject: Disambiguate `get_ex_outlier_stream_rows` query A backwards-compatible piece of #14979 that's safe to land now. --- synapse/storage/databases/main/events_worker.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index d7d08369ca..6d0ef10258 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1779,7 +1779,7 @@ class EventsWorkerStore(SQLBaseStore): txn: LoggingTransaction, ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]: sql = ( - "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," + "SELECT out.event_stream_ordering, e.event_id, e.room_id, e.type," " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL," " e.outlier" " FROM events AS e" @@ -1791,10 +1791,10 @@ class EventsWorkerStore(SQLBaseStore): " LEFT JOIN event_relations USING (event_id)" " LEFT JOIN room_memberships USING (event_id)" " LEFT JOIN rejections USING (event_id)" - " WHERE ? < event_stream_ordering" - " AND event_stream_ordering <= ?" + " WHERE ? < out.event_stream_ordering" + " AND out.event_stream_ordering <= ?" " AND out.instance_name = ?" - " ORDER BY event_stream_ordering ASC" + " ORDER BY out.event_stream_ordering ASC" ) txn.execute(sql, (last_id, current_id, instance_name)) -- cgit 1.5.1 From c78c67c5a909c6749f25b251d46be3df8f56f8c2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Feb 2023 17:41:55 +0100 Subject: Fix bug in replication where response is cached (#15024) --- changelog.d/15024.bugfix | 1 + synapse/replication/http/_base.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/15024.bugfix (limited to 'synapse') diff --git a/changelog.d/15024.bugfix b/changelog.d/15024.bugfix new file mode 100644 index 0000000000..dddd406322 --- /dev/null +++ b/changelog.d/15024.bugfix @@ -0,0 +1 @@ +Fix bug where retried replication requests would return a failure. Introduced in v1.76.0. diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 908f3f1db7..c20d9c7e9d 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -426,6 +426,8 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta): code, response = await self.response_cache.wrap( txn_id, self._handle_request, request, content, **kwargs ) + # Take a copy so we don't mutate things in the cache. + response = dict(response) else: # The `@cancellable` decorator may be applied to `_handle_request`. But we # told `HttpServer.register_paths` that our handler is `_check_auth_and_handle`, -- cgit 1.5.1 From c951fbedcb81895c199c1f4cfe2251d6c3a7b5f4 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 8 Feb 2023 13:09:41 -0500 Subject: MSC3873: Escape keys when flattening dicts. (#15004) This disambiguates keys which attempt to match fields with a dot in them (e.g. m.relates_to). Disabled by default behind an experimental configuration flag. --- changelog.d/15004.feature | 1 + synapse/config/experimental.py | 5 +++++ synapse/push/bulk_push_rule_evaluator.py | 30 ++++++++++++++++++++++++++---- tests/push/test_push_rule_evaluator.py | 8 ++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 changelog.d/15004.feature (limited to 'synapse') diff --git a/changelog.d/15004.feature b/changelog.d/15004.feature new file mode 100644 index 0000000000..d11d0aca91 --- /dev/null +++ b/changelog.d/15004.feature @@ -0,0 +1 @@ +Implement [MSC3873](https://github.com/matrix-org/matrix-spec-proposals/pull/3873) to unambiguate push rule keys with dots in them. diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 53c0682dfd..5e3a889081 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -169,6 +169,11 @@ class ExperimentalConfig(Config): # MSC3925: do not replace events with their edits self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False) + # MSC3873: Disambiguate event_match keys. + self.msc3783_escape_event_match_key = experimental.get( + "msc3783_escape_event_match_key", False + ) + # MSC3952: Intentional mentions self.msc3952_intentional_mentions = experimental.get( "msc3952_intentional_mentions", False diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index d9c0a98f44..39d2f88f03 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -271,7 +271,10 @@ class BulkPushRuleEvaluator: related_event_id, allow_none=True ) if related_event is not None: - related_events[relation_type] = _flatten_dict(related_event) + related_events[relation_type] = _flatten_dict( + related_event, + msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + ) reply_event_id = ( event.content.get("m.relates_to", {}) @@ -286,7 +289,10 @@ class BulkPushRuleEvaluator: ) if related_event is not None: - related_events["m.in_reply_to"] = _flatten_dict(related_event) + related_events["m.in_reply_to"] = _flatten_dict( + related_event, + msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + ) # indicate that this is from a fallback relation. if relation_type == "m.thread" and event.content.get( @@ -405,7 +411,10 @@ class BulkPushRuleEvaluator: room_mention = mentions.get("room") is True evaluator = PushRuleEvaluator( - _flatten_dict(event), + _flatten_dict( + event, + msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + ), has_mentions, user_mentions, room_mention, @@ -493,6 +502,8 @@ def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], prefix: Optional[List[str]] = None, result: Optional[Dict[str, str]] = None, + *, + msc3783_escape_event_match_key: bool = False, ) -> Dict[str, str]: """ Given a JSON dictionary (or event) which might contain sub dictionaries, @@ -521,11 +532,22 @@ def _flatten_dict( if result is None: result = {} for key, value in d.items(): + if msc3783_escape_event_match_key: + # Escape periods in the key with a backslash (and backslashes with an + # extra backslash). This is since a period is used as a separator between + # nested fields. + key = key.replace("\\", "\\\\").replace(".", "\\.") + if isinstance(value, str): result[".".join(prefix + [key])] = value.lower() elif isinstance(value, Mapping): # do not set `room_version` due to recursion considerations below - _flatten_dict(value, prefix=(prefix + [key]), result=result) + _flatten_dict( + value, + prefix=(prefix + [key]), + result=result, + msc3783_escape_event_match_key=msc3783_escape_event_match_key, + ) # `room_version` should only ever be set when looking at the top level of an event if ( diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index da33423871..516b65cc3c 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -48,6 +48,14 @@ class FlattenDictTestCase(unittest.TestCase): input = {"foo": {"bar": "abc"}} self.assertEqual({"foo.bar": "abc"}, _flatten_dict(input)) + # If a field has a dot in it, escape it. + input = {"m.foo": {"b\\ar": "abc"}} + self.assertEqual({"m.foo.b\\ar": "abc"}, _flatten_dict(input)) + self.assertEqual( + {"m\\.foo.b\\\\ar": "abc"}, + _flatten_dict(input, msc3783_escape_event_match_key=True), + ) + def test_non_string(self) -> None: """Non-string items are dropped.""" input: Dict[str, Any] = { -- cgit 1.5.1 From 55e4d27b36fd69a3cf3eceecbd42706579ef2dc7 Mon Sep 17 00:00:00 2001 From: Shay Date: Wed, 8 Feb 2023 11:25:11 -0800 Subject: Limit concurrent event creation for a room to avoid state resolution when sending bursts of events to a local room (#14977) --- changelog.d/14977.misc | 1 + synapse/handlers/message.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/14977.misc (limited to 'synapse') diff --git a/changelog.d/14977.misc b/changelog.d/14977.misc new file mode 100644 index 0000000000..4d551c52b7 --- /dev/null +++ b/changelog.d/14977.misc @@ -0,0 +1 @@ +Limit concurrent event creation for a room to avoid state resolution when sending bursts of events to a local room. \ No newline at end of file diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e688e00575..5f6da2943f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -499,9 +499,9 @@ class EventCreationHandler: self.request_ratelimiter = hs.get_request_ratelimiter() - # We arbitrarily limit concurrent event creation for a room to 5. - # This is to stop us from diverging history *too* much. - self.limiter = Linearizer(max_count=5, name="room_event_creation_limit") + # We limit concurrent event creation for a room to 1. This prevents state resolution + # from occurring when sending bursts of events to a local room + self.limiter = Linearizer(max_count=1, name="room_event_creation_limit") self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator() -- cgit 1.5.1 From 733531ee3e695da92f10e01b24f62ee35e09e4cd Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 9 Feb 2023 09:49:04 -0500 Subject: Add final type hint to synapse.server. (#15035) --- changelog.d/15035.misc | 1 + mypy.ini | 3 --- synapse/handlers/room.py | 2 +- synapse/server.py | 12 +++++------- synapse/storage/_base.py | 2 ++ synapse/storage/database.py | 1 + synapse/storage/databases/main/events.py | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) create mode 100644 changelog.d/15035.misc (limited to 'synapse') diff --git a/changelog.d/15035.misc b/changelog.d/15035.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/15035.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/mypy.ini b/mypy.ini index 3f144e61fb..57f27ba4f7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -53,9 +53,6 @@ warn_unused_ignores = False [mypy-synapse.util.caches.treecache] disallow_untyped_defs = False -[mypy-synapse.server] -disallow_untyped_defs = False - [mypy-synapse.storage.database] disallow_untyped_defs = False diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 7ba7c4ff07..0e759b8a5d 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1076,7 +1076,7 @@ class RoomCreationHandler: state_map: MutableStateMap[str] = {} # current_state_group of last event created. Used for computing event context of # events to be batched - current_state_group = None + current_state_group: Optional[int] = None def create_event_dict(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: e = {"type": etype, "content": content} diff --git a/synapse/server.py b/synapse/server.py index 9d6d268f49..efc6b5f895 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -21,7 +21,7 @@ import abc import functools import logging -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypeVar, cast +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast from twisted.internet.interfaces import IOpenSSLContextFactory from twisted.internet.tcp import Port @@ -144,10 +144,10 @@ if TYPE_CHECKING: from synapse.handlers.saml import SamlHandler -T = TypeVar("T", bound=Callable[..., Any]) +T = TypeVar("T") -def cache_in_self(builder: T) -> T: +def cache_in_self(builder: Callable[["HomeServer"], T]) -> Callable[["HomeServer"], T]: """Wraps a function called e.g. `get_foo`, checking if `self.foo` exists and returning if so. If not, calls the given function and sets `self.foo` to it. @@ -166,7 +166,7 @@ def cache_in_self(builder: T) -> T: building = [False] @functools.wraps(builder) - def _get(self): + def _get(self: "HomeServer") -> T: try: return getattr(self, depname) except AttributeError: @@ -185,9 +185,7 @@ def cache_in_self(builder: T) -> T: return dep - # We cast here as we need to tell mypy that `_get` has the same signature as - # `builder`. - return cast(T, _get) + return _get class HomeServer(metaclass=abc.ABCMeta): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 41d9111019..481fec72fe 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -37,6 +37,8 @@ class SQLBaseStore(metaclass=ABCMeta): per data store (and not one per physical database). """ + db_pool: DatabasePool + def __init__( self, database: DatabasePool, diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e20c5c5302..feaa6cdd07 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -499,6 +499,7 @@ class DatabasePool: """ _TXN_ID = 0 + engine: BaseDatabaseEngine def __init__( self, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1536937b67..cb66376fb4 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -306,7 +306,7 @@ class PersistEventsStore: # The set of event_ids to return. This includes all soft-failed events # and their prev events. - existing_prevs = set() + existing_prevs: Set[str] = set() def _get_prevs_before_rejected_txn( txn: LoggingTransaction, batch: Collection[str] -- cgit 1.5.1 From cd2484dc2e943e40242337dae61f5170638116a2 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 9 Feb 2023 15:28:26 +0000 Subject: Bump schema version (#15036) * Bump schema version This should have been included in f10caa73eee0caa91cf373966104d1ededae2aee (and #14979). * Changelog --- changelog.d/15036.misc | 1 + synapse/storage/schema/__init__.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 changelog.d/15036.misc (limited to 'synapse') diff --git a/changelog.d/15036.misc b/changelog.d/15036.misc new file mode 100644 index 0000000000..b0adc9c9d1 --- /dev/null +++ b/changelog.d/15036.misc @@ -0,0 +1 @@ +Prepare for future database schema changes. diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 19dbf2da7f..d3103a6c7a 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 73 # remember to update the list below when updating +SCHEMA_VERSION = 74 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -78,7 +78,7 @@ Changes in SCHEMA_VERSION = 72: - Unused column application_services_state.last_txn is dropped - Cache invalidation stream id sequence now begins at 2 to match code expectation. -Changes in SCHEMA_VERSION = 73; +Changes in SCHEMA_VERSION = 73: - thread_id column is added to event_push_actions, event_push_actions_staging event_push_summary, receipts_linearized, and receipts_graph. - Add table `event_failed_pull_attempts` to keep track when we fail to pull @@ -86,6 +86,11 @@ Changes in SCHEMA_VERSION = 73; - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`, `batch_events`) to make it easy to delete all associated rows when purging a room. - `inserted_ts` column is added to `event_push_actions_staging` table. + +Changes in SCHEMA_VERSION = 74: + - A query on `event_stream_ordering` column has now been disambiguated (i.e. the + codebase can handle the `current_state_events`, `local_current_memberships` and + `room_memberships` tables having an `event_stream_ordering` column). """ -- cgit 1.5.1 From 8a6e0434889ea94893119775b6f56904cbc575c2 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 9 Feb 2023 10:56:02 -0500 Subject: Avoid mutating cached room aliases. (#15038) This might cause incorrect data in other callers which are not expecting the canonical alias to be added into the response. --- changelog.d/15038.bugfix | 1 + synapse/handlers/directory.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15038.bugfix (limited to 'synapse') diff --git a/changelog.d/15038.bugfix b/changelog.d/15038.bugfix new file mode 100644 index 0000000000..4695a09756 --- /dev/null +++ b/changelog.d/15038.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where the room aliases returned could be corrupted. diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 2ea52257cb..d31b0fbb17 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -485,7 +485,8 @@ class DirectoryHandler: ) ) if canonical_alias: - room_aliases.append(canonical_alias) + # Ensure we do not mutate room_aliases. + room_aliases = room_aliases + [canonical_alias] if not self.config.roomdirectory.is_publishing_room_allowed( user_id, room_id, room_aliases -- cgit 1.5.1 From d22c1c862c8259465a8e95c41eb1f00d0367a640 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 9 Feb 2023 13:04:24 -0500 Subject: Respond correctly to unknown methods on known endpoints (#14605) Respond with a 405 error if a request is received on a known endpoint, but to an unknown method, per MSC3743. --- changelog.d/14605.bugfix | 1 + docs/admin_api/media_admin_api.md | 10 +++++++- docs/upgrade.md | 10 ++++++++ synapse/http/server.py | 40 ++++++++++++-------------------- synapse/rest/admin/media.py | 18 +++++++++++---- synapse/rest/client/room_keys.py | 48 ++++++++++++++++++++++++++------------- synapse/rest/client/tags.py | 4 +++- tests/rest/admin/test_media.py | 9 +++++--- 8 files changed, 89 insertions(+), 51 deletions(-) create mode 100644 changelog.d/14605.bugfix (limited to 'synapse') diff --git a/changelog.d/14605.bugfix b/changelog.d/14605.bugfix new file mode 100644 index 0000000000..cb95a87d92 --- /dev/null +++ b/changelog.d/14605.bugfix @@ -0,0 +1 @@ +Return spec-compliant JSON errors when unknown endpoints are requested. diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 7f8c8e22c1..30833f3109 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -235,6 +235,14 @@ The following fields are returned in the JSON response body: Request: +``` +POST /_synapse/admin/v1/media/delete?before_ts= + +{} +``` + +*Deprecated in Synapse v1.78.0:* This API is available at the deprecated endpoint: + ``` POST /_synapse/admin/v1/media//delete?before_ts= @@ -243,7 +251,7 @@ POST /_synapse/admin/v1/media//delete?before_ts= URL Parameters -* `server_name`: string - The name of your local server (e.g `matrix.org`). +* `server_name`: string - The name of your local server (e.g `matrix.org`). *Deprecated in Synapse v1.78.0.* * `before_ts`: string representing a positive integer - Unix timestamp in milliseconds. Files that were last used before this timestamp will be deleted. It is the timestamp of last access, not the timestamp when the file was created. diff --git a/docs/upgrade.md b/docs/upgrade.md index bc143444be..15167b8c58 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,15 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.78.0 + +## Deprecate the `/_synapse/admin/v1/media//delete` admin API + +Synapse 1.78.0 replaces the `/_synapse/admin/v1/media//delete` +admin API with an identical endpoint at `/_synapse/admin/v1/media/delete`. Please +update your tooling to use the new endpoint. The deprecated version will be removed +in a future release. + # Upgrading to v1.76.0 ## Faster joins are enabled by default @@ -137,6 +146,7 @@ and then do `pip install matrix-synapse[user-search]` for a PyPI install. Docker images and Debian packages need nothing specific as they already include or specify ICU as an explicit dependency. + # Upgrading to v1.73.0 ## Legacy Prometheus metric names have now been removed diff --git a/synapse/http/server.py b/synapse/http/server.py index 2563858f3c..9314454af1 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -30,7 +30,6 @@ from typing import ( Iterable, Iterator, List, - NoReturn, Optional, Pattern, Tuple, @@ -340,7 +339,8 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta): return callback_return - return _unrecognised_request_handler(request) + # A request with an unknown method (for a known endpoint) was received. + raise UnrecognizedRequestError(code=405) @abc.abstractmethod def _send_response( @@ -396,7 +396,6 @@ class DirectServeJsonResource(_AsyncResource): @attr.s(slots=True, frozen=True, auto_attribs=True) class _PathEntry: - pattern: Pattern callback: ServletCallback servlet_classname: str @@ -425,13 +424,14 @@ class JsonResource(DirectServeJsonResource): ): super().__init__(canonical_json, extract_context) self.clock = hs.get_clock() - self.path_regexs: Dict[bytes, List[_PathEntry]] = {} + # Map of path regex -> method -> callback. + self._routes: Dict[Pattern[str], Dict[bytes, _PathEntry]] = {} self.hs = hs def register_paths( self, method: str, - path_patterns: Iterable[Pattern], + path_patterns: Iterable[Pattern[str]], callback: ServletCallback, servlet_classname: str, ) -> None: @@ -455,8 +455,8 @@ class JsonResource(DirectServeJsonResource): for path_pattern in path_patterns: logger.debug("Registering for %s %s", method, path_pattern.pattern) - self.path_regexs.setdefault(method_bytes, []).append( - _PathEntry(path_pattern, callback, servlet_classname) + self._routes.setdefault(path_pattern, {})[method_bytes] = _PathEntry( + callback, servlet_classname ) def _get_handler_for_request( @@ -478,14 +478,17 @@ class JsonResource(DirectServeJsonResource): # Loop through all the registered callbacks to check if the method # and path regex match - for path_entry in self.path_regexs.get(request_method, []): - m = path_entry.pattern.match(request_path) + for path_pattern, methods in self._routes.items(): + m = path_pattern.match(request_path) if m: - # We found a match! + # We found a matching path! + path_entry = methods.get(request_method) + if not path_entry: + raise UnrecognizedRequestError(code=405) return path_entry.callback, path_entry.servlet_classname, m.groupdict() - # Huh. No one wanted to handle that? Fiiiiiine. Send 400. - return _unrecognised_request_handler, "unrecognised_request_handler", {} + # Huh. No one wanted to handle that? Fiiiiiine. + raise UnrecognizedRequestError(code=404) async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]: callback, servlet_classname, group_dict = self._get_handler_for_request(request) @@ -567,19 +570,6 @@ class StaticResource(File): return super().render_GET(request) -def _unrecognised_request_handler(request: Request) -> NoReturn: - """Request handler for unrecognised requests - - This is a request handler suitable for return from - _get_handler_for_request. It actually just raises an - UnrecognizedRequestError. - - Args: - request: Unused, but passed in to match the signature of ServletCallback. - """ - raise UnrecognizedRequestError(code=404) - - class UnrecognizedRequestResource(resource.Resource): """ Similar to twisted.web.resource.NoResource, but returns a JSON 404 with an diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 0d072c42a7..c134ccfb3d 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -15,7 +15,7 @@ import logging from http import HTTPStatus -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING, Optional, Tuple from synapse.api.constants import Direction from synapse.api.errors import Codes, NotFoundError, SynapseError @@ -285,7 +285,12 @@ class DeleteMediaByDateSize(RestServlet): timestamp and size. """ - PATTERNS = admin_patterns("/media/(?P[^/]*)/delete$") + PATTERNS = [ + *admin_patterns("/media/delete$"), + # This URL kept around for legacy reasons, it is undesirable since it + # overlaps with the DeleteMediaByID servlet. + *admin_patterns("/media/(?P[^/]*)/delete$"), + ] def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main @@ -294,7 +299,7 @@ class DeleteMediaByDateSize(RestServlet): self.media_repository = hs.get_media_repository() async def on_POST( - self, request: SynapseRequest, server_name: str + self, request: SynapseRequest, server_name: Optional[str] = None ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) @@ -322,7 +327,8 @@ class DeleteMediaByDateSize(RestServlet): errcode=Codes.INVALID_PARAM, ) - if self.server_name != server_name: + # This check is useless, we keep it for the legacy endpoint only. + if server_name is not None and self.server_name != server_name: raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local media") logging.info( @@ -489,6 +495,8 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer) ProtectMediaByID(hs).register(http_server) UnprotectMediaByID(hs).register(http_server) ListMediaInRoom(hs).register(http_server) - DeleteMediaByID(hs).register(http_server) + # XXX DeleteMediaByDateSize must be registered before DeleteMediaByID as + # their URL routes overlap. DeleteMediaByDateSize(hs).register(http_server) + DeleteMediaByID(hs).register(http_server) UserMediaRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/room_keys.py b/synapse/rest/client/room_keys.py index f7081f638e..4e7ffdb555 100644 --- a/synapse/rest/client/room_keys.py +++ b/synapse/rest/client/room_keys.py @@ -259,6 +259,32 @@ class RoomKeysNewVersionServlet(RestServlet): self.auth = hs.get_auth() self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler() + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + """ + Retrieve the version information about the most current backup version (if any) + + It takes out an exclusive lock on this user's room_key backups, to ensure + clients only upload to the current backup. + + Returns 404 if the given version does not exist. + + GET /room_keys/version HTTP/1.1 + { + "version": "12345", + "algorithm": "m.megolm_backup.v1", + "auth_data": "dGhpcyBzaG91bGQgYWN0dWFsbHkgYmUgZW5jcnlwdGVkIGpzb24K" + } + """ + requester = await self.auth.get_user_by_req(request, allow_guest=False) + user_id = requester.user.to_string() + + try: + info = await self.e2e_room_keys_handler.get_version_info(user_id) + except SynapseError as e: + if e.code == 404: + raise SynapseError(404, "No backup found", Codes.NOT_FOUND) + return 200, info + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: """ Create a new backup version for this user's room_keys with the given @@ -301,7 +327,7 @@ class RoomKeysNewVersionServlet(RestServlet): class RoomKeysVersionServlet(RestServlet): - PATTERNS = client_patterns("/room_keys/version(/(?P[^/]+))?$") + PATTERNS = client_patterns("/room_keys/version/(?P[^/]+)$") def __init__(self, hs: "HomeServer"): super().__init__() @@ -309,12 +335,11 @@ class RoomKeysVersionServlet(RestServlet): self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler() async def on_GET( - self, request: SynapseRequest, version: Optional[str] + self, request: SynapseRequest, version: str ) -> Tuple[int, JsonDict]: """ Retrieve the version information about a given version of the user's - room_keys backup. If the version part is missing, returns info about the - most current backup version (if any) + room_keys backup. It takes out an exclusive lock on this user's room_key backups, to ensure clients only upload to the current backup. @@ -339,20 +364,16 @@ class RoomKeysVersionServlet(RestServlet): return 200, info async def on_DELETE( - self, request: SynapseRequest, version: Optional[str] + self, request: SynapseRequest, version: str ) -> Tuple[int, JsonDict]: """ Delete the information about a given version of the user's - room_keys backup. If the version part is missing, deletes the most - current backup version (if any). Doesn't delete the actual room data. + room_keys backup. Doesn't delete the actual room data. DELETE /room_keys/version/12345 HTTP/1.1 HTTP/1.1 200 OK {} """ - if version is None: - raise SynapseError(400, "No version specified to delete", Codes.NOT_FOUND) - requester = await self.auth.get_user_by_req(request, allow_guest=False) user_id = requester.user.to_string() @@ -360,7 +381,7 @@ class RoomKeysVersionServlet(RestServlet): return 200, {} async def on_PUT( - self, request: SynapseRequest, version: Optional[str] + self, request: SynapseRequest, version: str ) -> Tuple[int, JsonDict]: """ Update the information about a given version of the user's room_keys backup. @@ -386,11 +407,6 @@ class RoomKeysVersionServlet(RestServlet): user_id = requester.user.to_string() info = parse_json_object_from_request(request) - if version is None: - raise SynapseError( - 400, "No version specified to update", Codes.MISSING_PARAM - ) - await self.e2e_room_keys_handler.update_version(user_id, version, info) return 200, {} diff --git a/synapse/rest/client/tags.py b/synapse/rest/client/tags.py index ca638755c7..dde08417a4 100644 --- a/synapse/rest/client/tags.py +++ b/synapse/rest/client/tags.py @@ -34,7 +34,9 @@ class TagListServlet(RestServlet): GET /user/{user_id}/rooms/{room_id}/tags HTTP/1.1 """ - PATTERNS = client_patterns("/user/(?P[^/]*)/rooms/(?P[^/]*)/tags") + PATTERNS = client_patterns( + "/user/(?P[^/]*)/rooms/(?P[^/]*)/tags$" + ) def __init__(self, hs: "HomeServer"): super().__init__() diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index aadb31ca83..db77a45ae3 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -213,7 +213,8 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): self.admin_user_tok = self.login("admin", "pass") self.filepaths = MediaFilePaths(hs.config.media.media_store_path) - self.url = "/_synapse/admin/v1/media/%s/delete" % self.server_name + self.url = "/_synapse/admin/v1/media/delete" + self.legacy_url = "/_synapse/admin/v1/media/%s/delete" % self.server_name # Move clock up to somewhat realistic time self.reactor.advance(1000000000) @@ -332,11 +333,13 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): channel.json_body["error"], ) - def test_delete_media_never_accessed(self) -> None: + @parameterized.expand([(True,), (False,)]) + def test_delete_media_never_accessed(self, use_legacy_url: bool) -> None: """ Tests that media deleted if it is older than `before_ts` and never accessed `last_access_ts` is `NULL` and `created_ts` < `before_ts` """ + url = self.legacy_url if use_legacy_url else self.url # upload and do not access server_and_media_id = self._create_media() @@ -351,7 +354,7 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): now_ms = self.clock.time_msec() channel = self.make_request( "POST", - self.url + "?before_ts=" + str(now_ms), + url + "?before_ts=" + str(now_ms), access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) -- cgit 1.5.1 From c1d2ce2901ab1c7cfaeebb4683af05a2ebf19fa6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 9 Feb 2023 19:57:01 +0000 Subject: Do not always start a db txn on Postgres (#14840) --- changelog.d/14840.misc | 1 + synapse/storage/prepare_database.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14840.misc (limited to 'synapse') diff --git a/changelog.d/14840.misc b/changelog.d/14840.misc new file mode 100644 index 0000000000..ff6084284a --- /dev/null +++ b/changelog.d/14840.misc @@ -0,0 +1 @@ +Prevent "WARNING: there is already a transaction in progress" lines appearing in PostgreSQL's logs on some occasions. \ No newline at end of file diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 3acdb39da7..6c335a9315 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -23,7 +23,7 @@ from typing_extensions import Counter as CounterType from synapse.config.homeserver import HomeServerConfig from synapse.storage.database import LoggingDatabaseConnection -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine +from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor @@ -108,9 +108,14 @@ def prepare_database( # so we start one before running anything. This ensures that any upgrades # are either applied completely, or not at all. # - # (psycopg2 automatically starts a transaction as soon as we run any statements - # at all, so this is redundant but harmless there.) - cur.execute("BEGIN TRANSACTION") + # psycopg2 does not automatically start transactions when in autocommit mode. + # While it is technically harmless to nest transactions in postgres, doing so + # results in a warning in Postgres' logs per query. And we'd rather like to + # avoid doing that. + if isinstance(database_engine, Sqlite3Engine) or ( + isinstance(database_engine, PostgresEngine) and db_conn.autocommit + ): + cur.execute("BEGIN TRANSACTION") logger.info("%r: Checking existing schema version", databases) version_info = _get_or_create_schema_state(cur, database_engine) -- cgit 1.5.1 From 03bccd542bcffe3ea12cd35108740a7d62dd38ab Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 9 Feb 2023 13:05:02 -0800 Subject: Add a class UnpersistedEventContext to allow for the batching up of storing state groups (#14675) * add class UnpersistedEventContext * modify create new client event to create unpersistedeventcontexts * persist event contexts after creation * fix tests to persist unpersisted event contexts * cleanup * misc lints + cleanup * changelog + fix comments * lints * fix batch insertion? * reduce redundant calculation * add unpersisted event classes * rework compute_event_context, split into function that returns unpersisted event context and then persists it * use calculate_context_info to create unpersisted event contexts * update typing * $%#^&* * black * fix comments and consolidate classes, use attr.s for class * requested changes * lint * requested changes * requested changes * refactor to be stupidly explicit * clearer renaming and flow * make partial state non-optional * update docstrings --------- Co-authored-by: Erik Johnston --- changelog.d/14675.misc | 1 + synapse/events/snapshot.py | 174 ++++++++++++++++++++++++++++++++- synapse/events/third_party_rules.py | 6 +- synapse/handlers/federation.py | 59 ++++++++---- synapse/handlers/federation_event.py | 6 +- synapse/handlers/message.py | 42 +++++--- synapse/state/__init__.py | 176 ++++++++++++++-------------------- tests/handlers/test_user_directory.py | 4 +- tests/rest/admin/test_user.py | 4 +- tests/storage/test_redaction.py | 24 +++-- tests/storage/test_state.py | 4 +- tests/test_utils/event_injection.py | 7 +- tests/test_visibility.py | 9 +- tests/utils.py | 5 +- 14 files changed, 359 insertions(+), 162 deletions(-) create mode 100644 changelog.d/14675.misc (limited to 'synapse') diff --git a/changelog.d/14675.misc b/changelog.d/14675.misc new file mode 100644 index 0000000000..bc1ac1c82a --- /dev/null +++ b/changelog.d/14675.misc @@ -0,0 +1 @@ +Add a class UnpersistedEventContext to allow for the batching up of storing state groups. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 6eaef8b57a..e0d82ad81c 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from abc import ABC, abstractmethod from typing import TYPE_CHECKING, List, Optional, Tuple import attr @@ -26,8 +27,51 @@ if TYPE_CHECKING: from synapse.types.state import StateFilter +class UnpersistedEventContextBase(ABC): + """ + This is a base class for EventContext and UnpersistedEventContext, objects which + hold information relevant to storing an associated event. Note that an + UnpersistedEventContexts must be converted into an EventContext before it is + suitable to send to the db with its associated event. + + Attributes: + _storage: storage controllers for interfacing with the database + app_service: If the associated event is being sent by a (local) application service, that + app service. + """ + + def __init__(self, storage_controller: "StorageControllers"): + self._storage: "StorageControllers" = storage_controller + self.app_service: Optional[ApplicationService] = None + + @abstractmethod + async def persist( + self, + event: EventBase, + ) -> "EventContext": + """ + A method to convert an UnpersistedEventContext to an EventContext, suitable for + sending to the database with the associated event. + """ + pass + + @abstractmethod + async def get_prev_state_ids( + self, state_filter: Optional["StateFilter"] = None + ) -> StateMap[str]: + """ + Gets the room state at the event (ie not including the event if the event is a + state event). + + Args: + state_filter: specifies the type of state event to fetch from DB, example: + EventTypes.JoinRules + """ + pass + + @attr.s(slots=True, auto_attribs=True) -class EventContext: +class EventContext(UnpersistedEventContextBase): """ Holds information relevant to persisting an event @@ -77,9 +121,6 @@ class EventContext: delta_ids: If ``prev_group`` is not None, the state delta between ``prev_group`` and ``state_group``. - app_service: If this event is being sent by a (local) application service, that - app service. - partial_state: if True, we may be storing this event with a temporary, incomplete state. """ @@ -122,6 +163,9 @@ class EventContext: """Return an EventContext instance suitable for persisting an outlier event""" return EventContext(storage=storage) + async def persist(self, event: EventBase) -> "EventContext": + return self + async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` @@ -254,6 +298,128 @@ class EventContext: ) +@attr.s(slots=True, auto_attribs=True) +class UnpersistedEventContext(UnpersistedEventContextBase): + """ + The event context holds information about the state groups for an event. It is important + to remember that an event technically has two state groups: the state group before the + event, and the state group after the event. If the event is not a state event, the state + group will not change (ie the state group before the event will be the same as the state + group after the event), but if it is a state event the state group before the event + will differ from the state group after the event. + This is a version of an EventContext before the new state group (if any) has been + computed and stored. It contains information about the state before the event (which + also may be the information after the event, if the event is not a state event). The + UnpersistedEventContext must be converted into an EventContext by calling the method + 'persist' on it before it is suitable to be sent to the DB for processing. + + state_group_after_event: + The state group after the event. This will always be None until it is persisted. + If the event is not a state event, this will be the same as + state_group_before_event. + + state_group_before_event: + The ID of the state group representing the state of the room before this event. + + state_delta_due_to_event: + If the event is a state event, then this is the delta of the state between + `state_group` and `state_group_before_event` + + prev_group_for_state_group_before_event: + If it is known, ``state_group_before_event``'s previous state group. + + delta_ids_to_state_group_before_event: + If ``prev_group_for_state_group_before_event`` is not None, the state delta + between ``prev_group_for_state_group_before_event`` and ``state_group_before_event``. + + partial_state: + Whether the event has partial state. + + state_map_before_event: + A map of the state before the event, i.e. the state at `state_group_before_event` + """ + + _storage: "StorageControllers" + state_group_before_event: Optional[int] + state_group_after_event: Optional[int] + state_delta_due_to_event: Optional[dict] + prev_group_for_state_group_before_event: Optional[int] + delta_ids_to_state_group_before_event: Optional[StateMap[str]] + partial_state: bool + state_map_before_event: Optional[StateMap[str]] = None + + async def get_prev_state_ids( + self, state_filter: Optional["StateFilter"] = None + ) -> StateMap[str]: + """ + Gets the room state map, excluding this event. + + Args: + state_filter: specifies the type of state event to fetch from DB + + Returns: + Maps a (type, state_key) to the event ID of the state event matching + this tuple. + """ + if self.state_map_before_event: + return self.state_map_before_event + + assert self.state_group_before_event is not None + return await self._storage.state.get_state_ids_for_group( + self.state_group_before_event, state_filter + ) + + async def persist(self, event: EventBase) -> EventContext: + """ + Creates a full `EventContext` for the event, persisting any referenced state that + has not yet been persisted. + + Args: + event: event that the EventContext is associated with. + + Returns: An EventContext suitable for sending to the database with the event + for persisting + """ + assert self.partial_state is not None + + # If we have a full set of state for before the event but don't have a state + # group for that state, we need to get one + if self.state_group_before_event is None: + assert self.state_map_before_event + state_group_before_event = await self._storage.state.store_state_group( + event.event_id, + event.room_id, + prev_group=self.prev_group_for_state_group_before_event, + delta_ids=self.delta_ids_to_state_group_before_event, + current_state_ids=self.state_map_before_event, + ) + self.state_group_before_event = state_group_before_event + + # if the event isn't a state event the state group doesn't change + if not self.state_delta_due_to_event: + state_group_after_event = self.state_group_before_event + + # otherwise if it is a state event we need to get a state group for it + else: + state_group_after_event = await self._storage.state.store_state_group( + event.event_id, + event.room_id, + prev_group=self.state_group_before_event, + delta_ids=self.state_delta_due_to_event, + current_state_ids=None, + ) + + return EventContext.with_state( + storage=self._storage, + state_group=state_group_after_event, + state_group_before_event=self.state_group_before_event, + state_delta_due_to_event=self.state_delta_due_to_event, + partial_state=self.partial_state, + prev_group=self.state_group_before_event, + delta_ids=self.state_delta_due_to_event, + ) + + def _encode_state_dict( state_dict: Optional[StateMap[str]], ) -> Optional[List[Tuple[str, str, str]]]: diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 72ab696898..97c61cc258 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -18,7 +18,7 @@ from twisted.internet.defer import CancelledError from synapse.api.errors import ModuleFailedException, SynapseError from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import UnpersistedEventContextBase from synapse.storage.roommember import ProfileInfo from synapse.types import Requester, StateMap from synapse.util.async_helpers import delay_cancellation, maybe_awaitable @@ -231,7 +231,9 @@ class ThirdPartyEventRules: self._on_threepid_bind_callbacks.append(on_threepid_bind) async def check_event_allowed( - self, event: EventBase, context: EventContext + self, + event: EventBase, + context: UnpersistedEventContextBase, ) -> Tuple[bool, Optional[dict]]: """Check if a provided event should be allowed in the given context. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 7f64130e0a..43ed4a3dd1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -56,7 +56,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.crypto.event_signing import compute_event_signature from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict @@ -990,7 +990,10 @@ class FederationHandler: ) try: - event, context = await self.event_creation_handler.create_new_client_event( + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_new_client_event( builder=builder ) except SynapseError as e: @@ -998,7 +1001,9 @@ class FederationHandler: raise # Ensure the user can even join the room. - await self._federation_event_handler.check_join_restrictions(context, event) + await self._federation_event_handler.check_join_restrictions( + unpersisted_context, event + ) # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_join_request` @@ -1178,7 +1183,7 @@ class FederationHandler: }, ) - event, context = await self.event_creation_handler.create_new_client_event( + event, _ = await self.event_creation_handler.create_new_client_event( builder=builder ) @@ -1228,12 +1233,13 @@ class FederationHandler: }, ) - event, context = await self.event_creation_handler.create_new_client_event( - builder=builder - ) + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_new_client_event(builder=builder) event_allowed, _ = await self.third_party_event_rules.check_event_allowed( - event, context + event, unpersisted_context ) if not event_allowed: logger.warning("Creation of knock %s forbidden by third-party rules", event) @@ -1406,15 +1412,20 @@ class FederationHandler: try: ( event, - context, + unpersisted_context, ) = await self.event_creation_handler.create_new_client_event( builder=builder ) - event, context = await self.add_display_name_to_third_party_invite( - room_version_obj, event_dict, event, context + ( + event, + unpersisted_context, + ) = await self.add_display_name_to_third_party_invite( + room_version_obj, event_dict, event, unpersisted_context ) + context = await unpersisted_context.persist(event) + EventValidator().validate_new(event, self.config) # We need to tell the transaction queue to send this out, even @@ -1483,14 +1494,19 @@ class FederationHandler: try: ( event, - context, + unpersisted_context, ) = await self.event_creation_handler.create_new_client_event( builder=builder ) - event, context = await self.add_display_name_to_third_party_invite( - room_version_obj, event_dict, event, context + ( + event, + unpersisted_context, + ) = await self.add_display_name_to_third_party_invite( + room_version_obj, event_dict, event, unpersisted_context ) + context = await unpersisted_context.persist(event) + try: validate_event_for_room_version(event) await self._event_auth_handler.check_auth_rules_from_context(event) @@ -1522,8 +1538,8 @@ class FederationHandler: room_version_obj: RoomVersion, event_dict: JsonDict, event: EventBase, - context: EventContext, - ) -> Tuple[EventBase, EventContext]: + context: UnpersistedEventContextBase, + ) -> Tuple[EventBase, UnpersistedEventContextBase]: key = ( EventTypes.ThirdPartyInvite, event.content["third_party_invite"]["signed"]["token"], @@ -1557,11 +1573,14 @@ class FederationHandler: room_version_obj, event_dict ) EventValidator().validate_builder(builder) - event, context = await self.event_creation_handler.create_new_client_event( - builder=builder - ) + + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_new_client_event(builder=builder) + EventValidator().validate_new(event, self.config) - return event, context + return event, unpersisted_context async def _check_signature(self, event: EventBase, context: EventContext) -> None: """ diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index e037acbca2..3561f2f1de 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -58,7 +58,7 @@ from synapse.event_auth import ( validate_event_for_room_version, ) from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.federation.federation_client import InvalidResponseError, PulledPduInfo from synapse.logging.context import nested_logging_context from synapse.logging.opentracing import ( @@ -426,7 +426,9 @@ class FederationEventHandler: return event, context async def check_join_restrictions( - self, context: EventContext, event: EventBase + self, + context: UnpersistedEventContextBase, + event: EventBase, ) -> None: """Check that restrictions in restricted join rules are matched diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5f6da2943f..3e30f52e4d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -48,7 +48,7 @@ from synapse.api.urls import ConsentURIBuilder from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase, relation_from_event from synapse.events.builder import EventBuilder -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.events.utils import maybe_upsert_event_field from synapse.events.validator import EventValidator from synapse.handlers.directory import DirectoryHandler @@ -708,7 +708,7 @@ class EventCreationHandler: builder.internal_metadata.historical = historical - event, context = await self.create_new_client_event( + event, unpersisted_context = await self.create_new_client_event( builder=builder, requester=requester, allow_no_prev_events=allow_no_prev_events, @@ -721,6 +721,8 @@ class EventCreationHandler: current_state_group=current_state_group, ) + context = await unpersisted_context.persist(event) + # In an ideal world we wouldn't need the second part of this condition. However, # this behaviour isn't spec'd yet, meaning we should be able to deactivate this # behaviour. Another reason is that this code is also evaluated each time a new @@ -1083,13 +1085,14 @@ class EventCreationHandler: state_map: Optional[StateMap[str]] = None, for_batch: bool = False, current_state_group: Optional[int] = None, - ) -> Tuple[EventBase, EventContext]: + ) -> Tuple[EventBase, UnpersistedEventContextBase]: """Create a new event for a local client. If bool for_batch is true, will create an event using the prev_event_ids, and will create an event context for the event using the parameters state_map and current_state_group, thus these parameters must be provided in this case if for_batch is True. The subsequently created event and context are suitable for being batched up and bulk persisted to the database - with other similarly created events. + with other similarly created events. Note that this returns an UnpersistedEventContext, + which must be converted to an EventContext before it can be sent to the DB. Args: builder: @@ -1131,7 +1134,7 @@ class EventCreationHandler: batch persisting Returns: - Tuple of created event, context + Tuple of created event, UnpersistedEventContext """ # Strip down the state_event_ids to only what we need to auth the event. # For example, we don't need extra m.room.member that don't match event.sender @@ -1192,9 +1195,16 @@ class EventCreationHandler: event = await builder.build( prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth ) - context = await self.state.compute_event_context_for_batched( - event, state_map, current_state_group + + context: UnpersistedEventContextBase = ( + await self.state.calculate_context_info( + event, + state_ids_before_event=state_map, + partial_state=False, + state_group_before_event=current_state_group, + ) ) + else: event = await builder.build( prev_event_ids=prev_event_ids, @@ -1244,16 +1254,17 @@ class EventCreationHandler: state_map_for_event[(data.event_type, data.state_key)] = state_id - context = await self.state.compute_event_context( + # TODO(faster_joins): check how MSC2716 works and whether we can have + # partial state here + # https://github.com/matrix-org/synapse/issues/13003 + context = await self.state.calculate_context_info( event, state_ids_before_event=state_map_for_event, - # TODO(faster_joins): check how MSC2716 works and whether we can have - # partial state here - # https://github.com/matrix-org/synapse/issues/13003 partial_state=False, ) + else: - context = await self.state.compute_event_context(event) + context = await self.state.calculate_context_info(event) if requester: context.app_service = requester.app_service @@ -2082,9 +2093,9 @@ class EventCreationHandler: async def _rebuild_event_after_third_party_rules( self, third_party_result: dict, original_event: EventBase - ) -> Tuple[EventBase, EventContext]: + ) -> Tuple[EventBase, UnpersistedEventContextBase]: # the third_party_event_rules want to replace the event. - # we do some basic checks, and then return the replacement event and context. + # we do some basic checks, and then return the replacement event. # Construct a new EventBuilder and validate it, which helps with the # rest of these checks. @@ -2138,5 +2149,6 @@ class EventCreationHandler: # we rebuild the event context, to be on the safe side. If nothing else, # delta_ids might need an update. - context = await self.state.compute_event_context(event) + context = await self.state.calculate_context_info(event) + return event, context diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index fdfb46ab82..e877e6f1a1 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -39,7 +39,11 @@ from prometheus_client import Counter, Histogram from synapse.api.constants import EventTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import ( + EventContext, + UnpersistedEventContext, + UnpersistedEventContextBase, +) from synapse.logging.context import ContextResourceUsage from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 @@ -262,31 +266,31 @@ class StateHandler: state = await entry.get_state(self._state_storage_controller, StateFilter.all()) return await self.store.get_joined_hosts(room_id, state, entry) - async def compute_event_context( + async def calculate_context_info( self, event: EventBase, state_ids_before_event: Optional[StateMap[str]] = None, partial_state: Optional[bool] = None, - ) -> EventContext: - """Build an EventContext structure for a non-outlier event. - - (for an outlier, call EventContext.for_outlier directly) - - This works out what the current state should be for the event, and - generates a new state group if necessary. - - Args: - event: - state_ids_before_event: The event ids of the state before the event if - it can't be calculated from existing events. This is normally - only specified when receiving an event from federation where we - don't have the prev events, e.g. when backfilling. - partial_state: - `True` if `state_ids_before_event` is partial and omits non-critical - membership events. - `False` if `state_ids_before_event` is the full state. - `None` when `state_ids_before_event` is not provided. In this case, the - flag will be calculated based on `event`'s prev events. + state_group_before_event: Optional[int] = None, + ) -> UnpersistedEventContextBase: + """ + Calulates the contents of an unpersisted event context, other than the current + state group (which is either provided or calculated when the event context is persisted) + + state_ids_before_event: + The event ids of the full state before the event if + it can't be calculated from existing events. This is normally + only specified when receiving an event from federation where we + don't have the prev events, e.g. when backfilling or when the event + is being created for batch persisting. + partial_state: + `True` if `state_ids_before_event` is partial and omits non-critical + membership events. + `False` if `state_ids_before_event` is the full state. + `None` when `state_ids_before_event` is not provided. In this case, the + flag will be calculated based on `event`'s prev events. + state_group_before_event: + the current state group at the time of event, if known Returns: The event context. @@ -294,7 +298,6 @@ class StateHandler: RuntimeError if `state_ids_before_event` is not provided and one or more prev events are missing or outliers. """ - assert not event.internal_metadata.is_outlier() # @@ -306,17 +309,6 @@ class StateHandler: state_group_before_event_prev_group = None deltas_to_state_group_before_event = None - # .. though we need to get a state group for it. - state_group_before_event = ( - await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=None, - delta_ids=None, - current_state_ids=state_ids_before_event, - ) - ) - # the partial_state flag must be provided assert partial_state is not None else: @@ -345,6 +337,7 @@ class StateHandler: logger.debug("calling resolve_state_groups from compute_event_context") # we've already taken into account partial state, so no need to wait for # complete state here. + entry = await self.resolve_state_groups_for_events( event.room_id, event.prev_event_ids(), @@ -383,18 +376,19 @@ class StateHandler: # if not event.is_state(): - return EventContext.with_state( + return UnpersistedEventContext( storage=self._storage_controllers, state_group_before_event=state_group_before_event, - state_group=state_group_before_event, + state_group_after_event=state_group_before_event, state_delta_due_to_event={}, - prev_group=state_group_before_event_prev_group, - delta_ids=deltas_to_state_group_before_event, + prev_group_for_state_group_before_event=state_group_before_event_prev_group, + delta_ids_to_state_group_before_event=deltas_to_state_group_before_event, partial_state=partial_state, + state_map_before_event=state_ids_before_event, ) # - # otherwise, we'll need to create a new state group for after the event + # otherwise, we'll need to set up creating a new state group for after the event # key = (event.type, event.state_key) @@ -412,88 +406,60 @@ class StateHandler: delta_ids = {key: event.event_id} - state_group_after_event = ( - await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=state_group_before_event, - delta_ids=delta_ids, - current_state_ids=None, - ) - ) - - return EventContext.with_state( + return UnpersistedEventContext( storage=self._storage_controllers, - state_group=state_group_after_event, state_group_before_event=state_group_before_event, + state_group_after_event=None, state_delta_due_to_event=delta_ids, - prev_group=state_group_before_event, - delta_ids=delta_ids, + prev_group_for_state_group_before_event=state_group_before_event_prev_group, + delta_ids_to_state_group_before_event=deltas_to_state_group_before_event, partial_state=partial_state, + state_map_before_event=state_ids_before_event, ) - async def compute_event_context_for_batched( + async def compute_event_context( self, event: EventBase, - state_ids_before_event: StateMap[str], - current_state_group: int, + state_ids_before_event: Optional[StateMap[str]] = None, + partial_state: Optional[bool] = None, ) -> EventContext: - """ - Generate an event context for an event that has not yet been persisted to the - database. Intended for use with events that are created to be persisted in a batch. - Args: - event: the event the context is being computed for - state_ids_before_event: a state map consisting of the state ids of the events - created prior to this event. - current_state_group: the current state group before the event. - """ - state_group_before_event_prev_group = None - deltas_to_state_group_before_event = None - - state_group_before_event = current_state_group - - # if the event is not state, we are set - if not event.is_state(): - return EventContext.with_state( - storage=self._storage_controllers, - state_group_before_event=state_group_before_event, - state_group=state_group_before_event, - state_delta_due_to_event={}, - prev_group=state_group_before_event_prev_group, - delta_ids=deltas_to_state_group_before_event, - partial_state=False, - ) + """Build an EventContext structure for a non-outlier event. - # otherwise, we'll need to create a new state group for after the event - key = (event.type, event.state_key) + (for an outlier, call EventContext.for_outlier directly) - if state_ids_before_event is not None: - replaces = state_ids_before_event.get(key) + This works out what the current state should be for the event, and + generates a new state group if necessary. - if replaces and replaces != event.event_id: - event.unsigned["replaces_state"] = replaces + Args: + event: + state_ids_before_event: The event ids of the state before the event if + it can't be calculated from existing events. This is normally + only specified when receiving an event from federation where we + don't have the prev events, e.g. when backfilling. + partial_state: + `True` if `state_ids_before_event` is partial and omits non-critical + membership events. + `False` if `state_ids_before_event` is the full state. + `None` when `state_ids_before_event` is not provided. In this case, the + flag will be calculated based on `event`'s prev events. + entry: + A state cache entry for the resolved state across the prev events. We may + have already calculated this, so if it's available pass it in + Returns: + The event context. - delta_ids = {key: event.event_id} + Raises: + RuntimeError if `state_ids_before_event` is not provided and one or more + prev events are missing or outliers. + """ - state_group_after_event = ( - await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=state_group_before_event, - delta_ids=delta_ids, - current_state_ids=None, - ) + unpersisted_context = await self.calculate_context_info( + event=event, + state_ids_before_event=state_ids_before_event, + partial_state=partial_state, ) - return EventContext.with_state( - storage=self._storage_controllers, - state_group=state_group_after_event, - state_group_before_event=state_group_before_event, - state_delta_due_to_event=delta_ids, - prev_group=state_group_before_event, - delta_ids=delta_ids, - partial_state=False, - ) + return await unpersisted_context.persist(event) @measure_func() async def resolve_state_groups_for_events( diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 75fc5a17a4..e9be5fb504 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -949,10 +949,12 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success( self.hs.get_storage_controllers().persistence.persist_event(event, context) ) diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 5c1ced355f..b50406e129 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -2934,10 +2934,12 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(storage_controllers.persistence.persist_event(event, context)) # Now get rooms diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index df4740f9d9..0100f7da14 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -74,10 +74,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(self._persistence.persist_event(event, context)) return event @@ -96,10 +98,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(self._persistence.persist_event(event, context)) return event @@ -119,10 +123,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(self._persistence.persist_event(event, context)) return event @@ -259,7 +265,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): def internal_metadata(self) -> _EventInternalMetadata: return self._base_builder.internal_metadata - event_1, context_1 = self.get_success( + event_1, unpersisted_context_1 = self.get_success( self.event_creation_handler.create_new_client_event( cast( EventBuilder, @@ -280,9 +286,11 @@ class RedactionTestCase(unittest.HomeserverTestCase): ) ) + context_1 = self.get_success(unpersisted_context_1.persist(event_1)) + self.get_success(self._persistence.persist_event(event_1, context_1)) - event_2, context_2 = self.get_success( + event_2, unpersisted_context_2 = self.get_success( self.event_creation_handler.create_new_client_event( cast( EventBuilder, @@ -302,6 +310,8 @@ class RedactionTestCase(unittest.HomeserverTestCase): ) ) ) + + context_2 = self.get_success(unpersisted_context_2.persist(event_2)) self.get_success(self._persistence.persist_event(event_2, context_2)) # fetch one of the redactions @@ -421,10 +431,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - redaction_event, context = self.get_success( + redaction_event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(redaction_event)) + self.get_success(self._persistence.persist_event(redaction_event, context)) # Now lets jump to the future where we have censored the redaction event diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index bad7f0bc60..f730b888f7 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -67,10 +67,12 @@ class StateStoreTestCase(HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + assert self.storage.persistence is not None self.get_success(self.storage.persistence.persist_event(event, context)) diff --git a/tests/test_utils/event_injection.py b/tests/test_utils/event_injection.py index 1a50c2acf1..a6330ed840 100644 --- a/tests/test_utils/event_injection.py +++ b/tests/test_utils/event_injection.py @@ -92,8 +92,13 @@ async def create_event( builder = hs.get_event_builder_factory().for_room_version( KNOWN_ROOM_VERSIONS[room_version], kwargs ) - event, context = await hs.get_event_creation_handler().create_new_client_event( + ( + event, + unpersisted_context, + ) = await hs.get_event_creation_handler().create_new_client_event( builder, prev_event_ids=prev_event_ids ) + context = await unpersisted_context.persist(event) + return event, context diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 875e37988f..36d6b37aa4 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -175,9 +175,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self._storage_controllers.persistence.persist_event(event, context) ) @@ -202,9 +203,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self._storage_controllers.persistence.persist_event(event, context) @@ -226,9 +228,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self._storage_controllers.persistence.persist_event(event, context) diff --git a/tests/utils.py b/tests/utils.py index d76bf9716a..15fabbc2d0 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -335,6 +335,9 @@ async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None: }, ) - event, context = await event_creation_handler.create_new_client_event(builder) + event, unpersisted_context = await event_creation_handler.create_new_client_event( + builder + ) + context = await unpersisted_context.persist(event) await persistence_store.persist_event(event, context) -- cgit 1.5.1 From a5a799722db0c33dc61fb2c6c7282ff7e82eb2e9 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 9 Feb 2023 22:33:39 +0000 Subject: Tag federation request spans with the worker name (#15042) * Systematically include worker name as process info * Changelog * don't bother with inner setdefault --- changelog.d/15042.feature | 1 + synapse/api/auth.py | 7 ------- synapse/logging/opentracing.py | 10 +++++++++- 3 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 changelog.d/15042.feature (limited to 'synapse') diff --git a/changelog.d/15042.feature b/changelog.d/15042.feature new file mode 100644 index 0000000000..7a4de89f00 --- /dev/null +++ b/changelog.d/15042.feature @@ -0,0 +1 @@ +Tag opentracing spans for federation requests with the name of the worker serving the request. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 3d7f986ac7..66e869bc2d 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -32,7 +32,6 @@ from synapse.appservice import ApplicationService from synapse.http import get_request_user_agent from synapse.http.site import SynapseRequest from synapse.logging.opentracing import ( - SynapseTags, active_span, force_tracing, start_active_span, @@ -162,12 +161,6 @@ class Auth: parent_span.set_tag( "authenticated_entity", requester.authenticated_entity ) - # We tag the Synapse instance name so that it's an easy jumping - # off point into the logs. Can also be used to filter for an - # instance that is under load. - parent_span.set_tag( - SynapseTags.INSTANCE_NAME, self.hs.get_instance_name() - ) parent_span.set_tag("user_id", requester.user.to_string()) if requester.device_id is not None: parent_span.set_tag("device_id", requester.device_id) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 8ef9a0dda8..6c7cf1b294 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -466,8 +466,16 @@ def init_tracer(hs: "HomeServer") -> None: STRIP_INSTANCE_NUMBER_SUFFIX_REGEX, "", hs.get_instance_name() ) + jaeger_config = hs.config.tracing.jaeger_config + tags = jaeger_config.setdefault("tags", {}) + + # tag the Synapse instance name so that it's an easy jumping + # off point into the logs. Can also be used to filter for an + # instance that is under load. + tags[SynapseTags.INSTANCE_NAME] = hs.get_instance_name() + config = JaegerConfig( - config=hs.config.tracing.jaeger_config, + config=jaeger_config, service_name=f"{hs.config.server.server_name} {instance_name_by_type}", scope_manager=LogContextScopeManager(), metrics_factory=PrometheusMetricsFactory(), -- cgit 1.5.1 From fd296b7343f2e557519f1ec81325ad836bcbdbf9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Feb 2023 10:52:35 +0100 Subject: Fix exception on start up about device lists (#15041) Fixes #15010. --- changelog.d/15041.misc | 1 + synapse/storage/databases/main/devices.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/15041.misc (limited to 'synapse') diff --git a/changelog.d/15041.misc b/changelog.d/15041.misc new file mode 100644 index 0000000000..d602b0043a --- /dev/null +++ b/changelog.d/15041.misc @@ -0,0 +1 @@ +Fix a rare exception in logs on start up. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index e8b6cc6b80..766c2052fb 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -100,6 +100,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ("device_lists_outbound_pokes", "stream_id"), ("device_lists_changes_in_room", "stream_id"), ("device_lists_remote_pending", "stream_id"), + ("device_lists_changes_converted_stream_position", "stream_id"), ], is_writer=hs.config.worker.worker_app is None, ) -- cgit 1.5.1 From a481fb9f98ad10e5e129bdc7664c59498a7332f6 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 10 Feb 2023 08:09:47 -0500 Subject: Refactor get_user_devices_from_cache to avoid mutating cached values. (#15040) The previous version of the code could mutate a cached value, but only if the input requested all devices of a user *and* a specific device. To avoid this nonsensical situation we no longer fetch a specific device ID if all of a user's devices are returned. --- changelog.d/15040.misc | 1 + synapse/handlers/e2e_keys.py | 11 +++++++---- synapse/storage/databases/main/devices.py | 31 +++++++++++++++++-------------- 3 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 changelog.d/15040.misc (limited to 'synapse') diff --git a/changelog.d/15040.misc b/changelog.d/15040.misc new file mode 100644 index 0000000000..ca129b64af --- /dev/null +++ b/changelog.d/15040.misc @@ -0,0 +1 @@ +Avoid mutating a cached value in `get_user_devices_from_cache`. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index d2188ca08f..43cbece21b 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -159,19 +159,22 @@ class E2eKeysHandler: # A map of destination -> user ID -> device IDs. remote_queries_not_in_cache: Dict[str, Dict[str, Iterable[str]]] = {} if remote_queries: - query_list: List[Tuple[str, Optional[str]]] = [] + user_ids = set() + user_and_device_ids: List[Tuple[str, str]] = [] for user_id, device_ids in remote_queries.items(): if device_ids: - query_list.extend( + user_and_device_ids.extend( (user_id, device_id) for device_id in device_ids ) else: - query_list.append((user_id, None)) + user_ids.add(user_id) ( user_ids_not_in_cache, remote_results, - ) = await self.store.get_user_devices_from_cache(query_list) + ) = await self.store.get_user_devices_from_cache( + user_ids, user_and_device_ids + ) # Check that the homeserver still shares a room with all cached users. # Note that this check may be slightly racy when a remote user leaves a diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 766c2052fb..85c1778a81 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -746,42 +746,45 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): @trace @cancellable async def get_user_devices_from_cache( - self, query_list: List[Tuple[str, Optional[str]]] + self, user_ids: Set[str], user_and_device_ids: List[Tuple[str, str]] ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]: """Get the devices (and keys if any) for remote users from the cache. Args: - query_list: List of (user_id, device_ids), if device_ids is - falsey then return all device ids for that user. + user_ids: users which should have all device IDs returned + user_and_device_ids: List of (user_id, device_ids) Returns: A tuple of (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is a set of user_ids and results_map is a mapping of user_id -> device_id -> device_info. """ - user_ids = {user_id for user_id, _ in query_list} - user_map = await self.get_device_list_last_stream_id_for_remotes(list(user_ids)) + unique_user_ids = user_ids | {user_id for user_id, _ in user_and_device_ids} + user_map = await self.get_device_list_last_stream_id_for_remotes( + list(unique_user_ids) + ) # We go and check if any of the users need to have their device lists # resynced. If they do then we remove them from the cached list. users_needing_resync = await self.get_user_ids_requiring_device_list_resync( - user_ids + unique_user_ids ) user_ids_in_cache = { user_id for user_id, stream_id in user_map.items() if stream_id } - users_needing_resync - user_ids_not_in_cache = user_ids - user_ids_in_cache + user_ids_not_in_cache = unique_user_ids - user_ids_in_cache + # First fetch all the users which all devices are to be returned. results: Dict[str, Dict[str, JsonDict]] = {} - for user_id, device_id in query_list: - if user_id not in user_ids_in_cache: - continue - - if device_id: + for user_id in user_ids: + if user_id in user_ids_in_cache: + results[user_id] = await self.get_cached_devices_for_user(user_id) + # Then fetch all device-specific requests, but skip users we've already + # fetched all devices for. + for user_id, device_id in user_and_device_ids: + if user_id in user_ids_in_cache and user_id not in user_ids: device = await self._get_cached_user_device(user_id, device_id) results.setdefault(user_id, {})[device_id] = device - else: - results[user_id] = await self.get_cached_devices_for_user(user_id) set_tag("in_cache", str(results)) set_tag("not_in_cache", str(user_ids_not_in_cache)) -- cgit 1.5.1 From b95407908dfde97e483952722b6fa7a533ff5093 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 10 Feb 2023 13:11:20 +0000 Subject: Avoid mutating cached values in `_generate_sync_entry_for_account_data` (#15047) --- changelog.d/15047.misc | 1 + synapse/handlers/sync.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/15047.misc (limited to 'synapse') diff --git a/changelog.d/15047.misc b/changelog.d/15047.misc new file mode 100644 index 0000000000..561dc874de --- /dev/null +++ b/changelog.d/15047.misc @@ -0,0 +1 @@ +Avoid mutating cached values in `_generate_sync_entry_for_account_data`. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 3566537894..202b35eee6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1753,6 +1753,7 @@ class SyncHandler: ) if push_rules_changed: + global_account_data = dict(global_account_data) global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) @@ -1763,6 +1764,7 @@ class SyncHandler: account_data_by_room, ) = await self.store.get_account_data_for_user(sync_config.user.to_string()) + global_account_data = dict(global_account_data) global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) -- cgit 1.5.1 From cf5233b783273efc84b991e7242fb4761ccc201a Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 10 Feb 2023 09:22:16 -0500 Subject: Avoid fetching unused account data in sync. (#14973) The per-room account data is no longer unconditionally fetched, even if all rooms will be filtered out. Global account data will not be fetched if it will all be filtered out. --- changelog.d/14973.misc | 1 + synapse/api/filtering.py | 30 +++++- synapse/handlers/account_data.py | 10 +- synapse/handlers/initial_sync.py | 5 +- synapse/handlers/room_member.py | 2 +- synapse/handlers/sync.py | 88 +++++++++-------- synapse/rest/admin/users.py | 3 +- synapse/storage/databases/main/account_data.py | 127 ++++++++++++++++++------- 8 files changed, 176 insertions(+), 90 deletions(-) create mode 100644 changelog.d/14973.misc (limited to 'synapse') diff --git a/changelog.d/14973.misc b/changelog.d/14973.misc new file mode 100644 index 0000000000..3657623602 --- /dev/null +++ b/changelog.d/14973.misc @@ -0,0 +1 @@ +Improve performance of `/sync` in a few situations. diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 83c42fc25a..b9f432cc23 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -219,9 +219,13 @@ class FilterCollection: self._room_timeline_filter = Filter(hs, room_filter_json.get("timeline", {})) self._room_state_filter = Filter(hs, room_filter_json.get("state", {})) self._room_ephemeral_filter = Filter(hs, room_filter_json.get("ephemeral", {})) - self._room_account_data = Filter(hs, room_filter_json.get("account_data", {})) + self._room_account_data_filter = Filter( + hs, room_filter_json.get("account_data", {}) + ) self._presence_filter = Filter(hs, filter_json.get("presence", {})) - self._account_data = Filter(hs, filter_json.get("account_data", {})) + self._global_account_data_filter = Filter( + hs, filter_json.get("account_data", {}) + ) self.include_leave = filter_json.get("room", {}).get("include_leave", False) self.event_fields = filter_json.get("event_fields", []) @@ -256,8 +260,10 @@ class FilterCollection: ) -> List[UserPresenceState]: return await self._presence_filter.filter(presence_states) - async def filter_account_data(self, events: Iterable[JsonDict]) -> List[JsonDict]: - return await self._account_data.filter(events) + async def filter_global_account_data( + self, events: Iterable[JsonDict] + ) -> List[JsonDict]: + return await self._global_account_data_filter.filter(events) async def filter_room_state(self, events: Iterable[EventBase]) -> List[EventBase]: return await self._room_state_filter.filter( @@ -279,7 +285,7 @@ class FilterCollection: async def filter_room_account_data( self, events: Iterable[JsonDict] ) -> List[JsonDict]: - return await self._room_account_data.filter( + return await self._room_account_data_filter.filter( await self._room_filter.filter(events) ) @@ -292,6 +298,13 @@ class FilterCollection: or self._presence_filter.filters_all_senders() ) + def blocks_all_global_account_data(self) -> bool: + """True if all global acount data will be filtered out.""" + return ( + self._global_account_data_filter.filters_all_types() + or self._global_account_data_filter.filters_all_senders() + ) + def blocks_all_room_ephemeral(self) -> bool: return ( self._room_ephemeral_filter.filters_all_types() @@ -299,6 +312,13 @@ class FilterCollection: or self._room_ephemeral_filter.filters_all_rooms() ) + def blocks_all_room_account_data(self) -> bool: + return ( + self._room_account_data_filter.filters_all_types() + or self._room_account_data_filter.filters_all_senders() + or self._room_account_data_filter.filters_all_rooms() + ) + def blocks_all_room_timeline(self) -> bool: return ( self._room_timeline_filter.filters_all_types() diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index 67e789eef7..797de46dbc 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -343,10 +343,12 @@ class AccountDataEventSource(EventSource[int, JsonDict]): } ) - ( - account_data, - room_account_data, - ) = await self.store.get_updated_account_data_for_user(user_id, last_stream_id) + account_data = await self.store.get_updated_global_account_data_for_user( + user_id, last_stream_id + ) + room_account_data = await self.store.get_updated_room_account_data_for_user( + user_id, last_stream_id + ) for account_data_type, content in account_data.items(): results.append({"type": account_data_type, "content": content}) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 191529bd8e..1a29abde98 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -154,9 +154,8 @@ class InitialSyncHandler: tags_by_room = await self.store.get_tags_for_user(user_id) - account_data, account_data_by_room = await self.store.get_account_data_for_user( - user_id - ) + account_data = await self.store.get_global_account_data_for_user(user_id) + account_data_by_room = await self.store.get_room_account_data_for_user(user_id) public_room_ids = await self.store.get_public_room_ids() diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index d236cc09b5..6e7141d2ef 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -484,7 +484,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): user_id: The user's ID. """ # Retrieve user account data for predecessor room - user_account_data, _ = await self.store.get_account_data_for_user(user_id) + user_account_data = await self.store.get_global_account_data_for_user(user_id) # Copy direct message state if applicable direct_rooms = user_account_data.get(AccountDataTypes.DIRECT, {}) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 202b35eee6..399685e5b7 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1444,9 +1444,9 @@ class SyncHandler: logger.debug("Fetching account data") - account_data_by_room = await self._generate_sync_entry_for_account_data( - sync_result_builder - ) + # Global account data is included if it is not filtered out. + if not sync_config.filter_collection.blocks_all_global_account_data(): + await self._generate_sync_entry_for_account_data(sync_result_builder) # Presence data is included if the server has it enabled and not filtered out. include_presence_data = bool( @@ -1472,9 +1472,7 @@ class SyncHandler: ( newly_joined_rooms, newly_left_rooms, - ) = await self._generate_sync_entry_for_rooms( - sync_result_builder, account_data_by_room - ) + ) = await self._generate_sync_entry_for_rooms(sync_result_builder) # Work out which users have joined or left rooms we're in. We use this # to build the presence and device_list parts of the sync response in @@ -1717,35 +1715,29 @@ class SyncHandler: async def _generate_sync_entry_for_account_data( self, sync_result_builder: "SyncResultBuilder" - ) -> Dict[str, Dict[str, JsonDict]]: - """Generates the account data portion of the sync response. + ) -> None: + """Generates the global account data portion of the sync response. Account data (called "Client Config" in the spec) can be set either globally or for a specific room. Account data consists of a list of events which accumulate state, much like a room. - This function retrieves global and per-room account data. The former is written - to the given `sync_result_builder`. The latter is returned directly, to be - later written to the `sync_result_builder` on a room-by-room basis. + This function retrieves global account data and writes it to the given + `sync_result_builder`. See `_generate_sync_entry_for_rooms` for handling + of per-room account data. Args: sync_result_builder - - Returns: - A dictionary whose keys (room ids) map to the per room account data for that - room. """ sync_config = sync_result_builder.sync_config user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token if since_token and not sync_result_builder.full_state: - # TODO Do not fetch room account data if it will be unused. - ( - global_account_data, - account_data_by_room, - ) = await self.store.get_updated_account_data_for_user( - user_id, since_token.account_data_key + global_account_data = ( + await self.store.get_updated_global_account_data_for_user( + user_id, since_token.account_data_key + ) ) push_rules_changed = await self.store.have_push_rules_changed_for_user( @@ -1758,28 +1750,26 @@ class SyncHandler: sync_config.user ) else: - # TODO Do not fetch room account data if it will be unused. - ( - global_account_data, - account_data_by_room, - ) = await self.store.get_account_data_for_user(sync_config.user.to_string()) + all_global_account_data = await self.store.get_global_account_data_for_user( + user_id + ) - global_account_data = dict(global_account_data) + global_account_data = dict(all_global_account_data) global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) - account_data_for_user = await sync_config.filter_collection.filter_account_data( - [ - {"type": account_data_type, "content": content} - for account_data_type, content in global_account_data.items() - ] + account_data_for_user = ( + await sync_config.filter_collection.filter_global_account_data( + [ + {"type": account_data_type, "content": content} + for account_data_type, content in global_account_data.items() + ] + ) ) sync_result_builder.account_data = account_data_for_user - return account_data_by_room - async def _generate_sync_entry_for_presence( self, sync_result_builder: "SyncResultBuilder", @@ -1839,9 +1829,7 @@ class SyncHandler: sync_result_builder.presence = presence async def _generate_sync_entry_for_rooms( - self, - sync_result_builder: "SyncResultBuilder", - account_data_by_room: Dict[str, Dict[str, JsonDict]], + self, sync_result_builder: "SyncResultBuilder" ) -> Tuple[AbstractSet[str], AbstractSet[str]]: """Generates the rooms portion of the sync response. Populates the `sync_result_builder` with the result. @@ -1852,7 +1840,6 @@ class SyncHandler: Args: sync_result_builder - account_data_by_room: Dictionary of per room account data Returns: Returns a 2-tuple describing rooms the user has joined or left. @@ -1865,9 +1852,30 @@ class SyncHandler: since_token = sync_result_builder.since_token user_id = sync_result_builder.sync_config.user.to_string() + blocks_all_rooms = ( + sync_result_builder.sync_config.filter_collection.blocks_all_rooms() + ) + + # 0. Start by fetching room account data (if required). + if ( + blocks_all_rooms + or sync_result_builder.sync_config.filter_collection.blocks_all_room_account_data() + ): + account_data_by_room: Mapping[str, Mapping[str, JsonDict]] = {} + elif since_token and not sync_result_builder.full_state: + account_data_by_room = ( + await self.store.get_updated_room_account_data_for_user( + user_id, since_token.account_data_key + ) + ) + else: + account_data_by_room = await self.store.get_room_account_data_for_user( + user_id + ) + # 1. Start by fetching all ephemeral events in rooms we've joined (if required). block_all_room_ephemeral = ( - sync_result_builder.sync_config.filter_collection.blocks_all_rooms() + blocks_all_rooms or sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral() ) if block_all_room_ephemeral: @@ -2294,7 +2302,7 @@ class SyncHandler: room_builder: "RoomSyncResultBuilder", ephemeral: List[JsonDict], tags: Optional[Dict[str, Dict[str, Any]]], - account_data: Dict[str, JsonDict], + account_data: Mapping[str, JsonDict], always_include: bool = False, ) -> None: """Populates the `joined` and `archived` section of `sync_result_builder` diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index b9dca8ef3a..0c0bf540b9 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -1192,7 +1192,8 @@ class AccountDataRestServlet(RestServlet): if not await self._store.get_user_by_id(user_id): raise NotFoundError("User not found") - global_data, by_room_data = await self._store.get_account_data_for_user(user_id) + global_data = await self._store.get_global_account_data_for_user(user_id) + by_room_data = await self._store.get_room_account_data_for_user(user_id) return HTTPStatus.OK, { "account_data": { "global": global_data, diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 8a359d7eb8..2d6f02c14f 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -21,6 +21,7 @@ from typing import ( FrozenSet, Iterable, List, + Mapping, Optional, Tuple, cast, @@ -122,25 +123,25 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) return self._account_data_id_gen.get_current_token() @cached() - async def get_account_data_for_user( + async def get_global_account_data_for_user( self, user_id: str - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: + ) -> Mapping[str, JsonDict]: """ - Get all the client account_data for a user. + Get all the global client account_data for a user. If experimental MSC3391 support is enabled, any entries with an empty content body are excluded; as this means they have been deleted. Args: user_id: The user to get the account_data for. + Returns: - A 2-tuple of a dict of global account_data and a dict mapping from - room_id string to per room account_data dicts. + The global account_data. """ - def get_account_data_for_user_txn( + def get_global_account_data_for_user( txn: LoggingTransaction, - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: + ) -> Dict[str, JsonDict]: # The 'content != '{}' condition below prevents us from using # `simple_select_list_txn` here, as it doesn't support conditions # other than 'equals'. @@ -158,10 +159,34 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) txn.execute(sql, (user_id,)) rows = self.db_pool.cursor_to_dict(txn) - global_account_data = { + return { row["account_data_type"]: db_to_json(row["content"]) for row in rows } + return await self.db_pool.runInteraction( + "get_global_account_data_for_user", get_global_account_data_for_user + ) + + @cached() + async def get_room_account_data_for_user( + self, user_id: str + ) -> Mapping[str, Mapping[str, JsonDict]]: + """ + Get all of the per-room client account_data for a user. + + If experimental MSC3391 support is enabled, any entries with an empty + content body are excluded; as this means they have been deleted. + + Args: + user_id: The user to get the account_data for. + + Returns: + A dict mapping from room_id string to per-room account_data dicts. + """ + + def get_room_account_data_for_user_txn( + txn: LoggingTransaction, + ) -> Dict[str, Dict[str, JsonDict]]: # The 'content != '{}' condition below prevents us from using # `simple_select_list_txn` here, as it doesn't support conditions # other than 'equals'. @@ -185,10 +210,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) room_data[row["account_data_type"]] = db_to_json(row["content"]) - return global_account_data, by_room + return by_room return await self.db_pool.runInteraction( - "get_account_data_for_user", get_account_data_for_user_txn + "get_room_account_data_for_user_txn", get_room_account_data_for_user_txn ) @cached(num_args=2, max_entries=5000, tree=True) @@ -342,36 +367,61 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) "get_updated_room_account_data", get_updated_room_account_data_txn ) - async def get_updated_account_data_for_user( + async def get_updated_global_account_data_for_user( self, user_id: str, stream_id: int - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: - """Get all the client account_data for a that's changed for a user + ) -> Dict[str, JsonDict]: + """Get all the global account_data that's changed for a user. Args: user_id: The user to get the account_data for. stream_id: The point in the stream since which to get updates + Returns: - A deferred pair of a dict of global account_data and a dict - mapping from room_id string to per room account_data dicts. + A dict of global account_data. """ - def get_updated_account_data_for_user_txn( + def get_updated_global_account_data_for_user( txn: LoggingTransaction, - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: - sql = ( - "SELECT account_data_type, content FROM account_data" - " WHERE user_id = ? AND stream_id > ?" - ) - + ) -> Dict[str, JsonDict]: + sql = """ + SELECT account_data_type, content FROM account_data + WHERE user_id = ? AND stream_id > ? + """ txn.execute(sql, (user_id, stream_id)) - global_account_data = {row[0]: db_to_json(row[1]) for row in txn} + return {row[0]: db_to_json(row[1]) for row in txn} - sql = ( - "SELECT room_id, account_data_type, content FROM room_account_data" - " WHERE user_id = ? AND stream_id > ?" - ) + changed = self._account_data_stream_cache.has_entity_changed( + user_id, int(stream_id) + ) + if not changed: + return {} + + return await self.db_pool.runInteraction( + "get_updated_global_account_data_for_user", + get_updated_global_account_data_for_user, + ) + + async def get_updated_room_account_data_for_user( + self, user_id: str, stream_id: int + ) -> Dict[str, Dict[str, JsonDict]]: + """Get all the room account_data that's changed for a user. + Args: + user_id: The user to get the account_data for. + stream_id: The point in the stream since which to get updates + + Returns: + A dict mapping from room_id string to per room account_data dicts. + """ + + def get_updated_room_account_data_for_user_txn( + txn: LoggingTransaction, + ) -> Dict[str, Dict[str, JsonDict]]: + sql = """ + SELECT room_id, account_data_type, content FROM room_account_data + WHERE user_id = ? AND stream_id > ? + """ txn.execute(sql, (user_id, stream_id)) account_data_by_room: Dict[str, Dict[str, JsonDict]] = {} @@ -379,16 +429,17 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) room_account_data = account_data_by_room.setdefault(row[0], {}) room_account_data[row[1]] = db_to_json(row[2]) - return global_account_data, account_data_by_room + return account_data_by_room changed = self._account_data_stream_cache.has_entity_changed( user_id, int(stream_id) ) if not changed: - return {}, {} + return {} return await self.db_pool.runInteraction( - "get_updated_account_data_for_user", get_updated_account_data_for_user_txn + "get_updated_room_account_data_for_user", + get_updated_room_account_data_for_user_txn, ) @cached(max_entries=5000, iterable=True) @@ -444,7 +495,8 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) self.get_global_account_data_by_type_for_user.invalidate( (row.user_id, row.data_type) ) - self.get_account_data_for_user.invalidate((row.user_id,)) + self.get_global_account_data_for_user.invalidate((row.user_id,)) + self.get_room_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id)) self.get_account_data_for_room_and_type.invalidate( (row.user_id, row.room_id, row.data_type) @@ -492,7 +544,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) ) self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_room_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id)) self.get_account_data_for_room_and_type.prefill( (user_id, room_id, account_data_type), content @@ -558,7 +610,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) return None self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_room_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id)) self.get_account_data_for_room_and_type.prefill( (user_id, room_id, account_data_type), {} @@ -593,7 +645,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) ) self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_global_account_data_for_user.invalidate((user_id,)) self.get_global_account_data_by_type_for_user.invalidate( (user_id, account_data_type) ) @@ -761,7 +813,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) return None self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_global_account_data_for_user.invalidate((user_id,)) self.get_global_account_data_by_type_for_user.prefill( (user_id, account_data_type), {} ) @@ -822,7 +874,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) txn, self.get_account_data_for_room_and_type, (user_id,) ) self._invalidate_cache_and_stream( - txn, self.get_account_data_for_user, (user_id,) + txn, self.get_global_account_data_for_user, (user_id,) + ) + self._invalidate_cache_and_stream( + txn, self.get_room_account_data_for_user, (user_id,) ) self._invalidate_cache_and_stream( txn, self.get_global_account_data_by_type_for_user, (user_id,) -- cgit 1.5.1 From 14be78d492fc31e743e9e5855ddb8b4c9520985a Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 10 Feb 2023 12:37:07 -0500 Subject: Support for MSC3758: exact_event_match push condition (#14964) This specifies to search for an exact value match, instead of string globbing. It only works across non-compound JSON values (null, boolean, integer, and strings). --- changelog.d/14964.feature | 1 + rust/benches/evaluator.rs | 65 +++++++++++--- rust/src/push/evaluator.rs | 69 +++++++++++---- rust/src/push/mod.rs | 83 +++++++++++++++++ stubs/synapse/synapse_rust/push.pyi | 7 +- synapse/config/experimental.py | 5 ++ synapse/push/bulk_push_rule_evaluator.py | 18 ++-- synapse/types/__init__.py | 2 + tests/push/test_push_rule_evaluator.py | 147 ++++++++++++++++++++++++++++++- 9 files changed, 356 insertions(+), 41 deletions(-) create mode 100644 changelog.d/14964.feature (limited to 'synapse') diff --git a/changelog.d/14964.feature b/changelog.d/14964.feature new file mode 100644 index 0000000000..13c0bc193b --- /dev/null +++ b/changelog.d/14964.feature @@ -0,0 +1 @@ +Implement the experimental `exact_event_match` push rule condition from [MSC3758](https://github.com/matrix-org/matrix-spec-proposals/pull/3758). diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 35f7a50bce..229553ebf8 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -16,6 +16,7 @@ use std::collections::BTreeSet; use synapse::push::{ evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules, + SimpleJsonValue, }; use test::Bencher; @@ -24,9 +25,18 @@ extern crate test; #[bench] fn bench_match_exact(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -43,6 +53,7 @@ fn bench_match_exact(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); @@ -63,9 +74,18 @@ fn bench_match_exact(b: &mut Bencher) { #[bench] fn bench_match_word(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -82,6 +102,7 @@ fn bench_match_word(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); @@ -102,9 +123,18 @@ fn bench_match_word(b: &mut Bencher) { #[bench] fn bench_match_word_miss(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -121,6 +151,7 @@ fn bench_match_word_miss(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); @@ -141,9 +172,18 @@ fn bench_match_word_miss(b: &mut Bencher) { #[bench] fn bench_eval_message(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -160,6 +200,7 @@ fn bench_eval_message(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index ec7a8c4453..dd6b4343ec 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -22,8 +22,8 @@ use regex::Regex; use super::{ utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType}, - Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition, - RelatedEventMatchCondition, + Action, Condition, EventMatchCondition, ExactEventMatchCondition, FilteredPushRules, + KnownCondition, RelatedEventMatchCondition, SimpleJsonValue, }; lazy_static! { @@ -61,9 +61,9 @@ impl RoomVersionFeatures { /// Allows running a set of push rules against a particular event. #[pyclass] pub struct PushRuleEvaluator { - /// A mapping of "flattened" keys to string values in the event, e.g. + /// A mapping of "flattened" keys to simple JSON values in the event, e.g. /// includes things like "type" and "content.msgtype". - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, /// The "content.body", if any. body: String, @@ -87,7 +87,7 @@ pub struct PushRuleEvaluator { /// The related events, indexed by relation type. Flattened in the same manner as /// `flattened_keys`. - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, /// If msc3664, push rules for related events, is enabled. related_event_match_enabled: bool, @@ -98,6 +98,9 @@ pub struct PushRuleEvaluator { /// If MSC3931 (room version feature flags) is enabled. Usually controlled by the same /// flag as MSC1767 (extensible events core). msc3931_enabled: bool, + + /// If MSC3758 (exact_event_match push rule condition) is enabled. + msc3758_exact_event_match: bool, } #[pymethods] @@ -106,22 +109,23 @@ impl PushRuleEvaluator { #[allow(clippy::too_many_arguments)] #[new] pub fn py_new( - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, has_mentions: bool, user_mentions: BTreeSet, room_mention: bool, room_member_count: u64, sender_power_level: Option, notification_power_levels: BTreeMap, - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, related_event_match_enabled: bool, room_version_feature_flags: Vec, msc3931_enabled: bool, + msc3758_exact_event_match: bool, ) -> Result { - let body = flattened_keys - .get("content.body") - .cloned() - .unwrap_or_default(); + let body = match flattened_keys.get("content.body") { + Some(SimpleJsonValue::Str(s)) => s.clone(), + _ => String::new(), + }; Ok(PushRuleEvaluator { flattened_keys, @@ -136,6 +140,7 @@ impl PushRuleEvaluator { related_event_match_enabled, room_version_feature_flags, msc3931_enabled, + msc3758_exact_event_match, }) } @@ -252,6 +257,9 @@ impl PushRuleEvaluator { KnownCondition::EventMatch(event_match) => { self.match_event_match(event_match, user_id)? } + KnownCondition::ExactEventMatch(exact_event_match) => { + self.match_exact_event_match(exact_event_match)? + } KnownCondition::RelatedEventMatch(event_match) => { self.match_related_event_match(event_match, user_id)? } @@ -337,7 +345,9 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) { + let haystack = if let Some(SimpleJsonValue::Str(haystack)) = + self.flattened_keys.get(&*event_match.key) + { haystack } else { return Ok(false); @@ -355,6 +365,27 @@ impl PushRuleEvaluator { compiled_pattern.is_match(haystack) } + /// Evaluates a `exact_event_match` condition. (MSC3758) + fn match_exact_event_match( + &self, + exact_event_match: &ExactEventMatchCondition, + ) -> Result { + // First check if the feature is enabled. + if !self.msc3758_exact_event_match { + return Ok(false); + } + + let value = &exact_event_match.value; + + let haystack = if let Some(haystack) = self.flattened_keys.get(&*exact_event_match.key) { + haystack + } else { + return Ok(false); + }; + + Ok(haystack == &**value) + } + /// Evaluates a `related_event_match` condition. (MSC3664) fn match_related_event_match( &self, @@ -410,7 +441,7 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(haystack) = event.get(&**key) { + let haystack = if let Some(SimpleJsonValue::Str(haystack)) = event.get(&**key) { haystack } else { return Ok(false); @@ -455,7 +486,10 @@ impl PushRuleEvaluator { #[test] fn push_rule_evaluator() { let mut flattened_keys = BTreeMap::new(); - flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string()); + flattened_keys.insert( + "content.body".to_string(), + SimpleJsonValue::Str("foo bar bob hello".to_string()), + ); let evaluator = PushRuleEvaluator::py_new( flattened_keys, false, @@ -468,6 +502,7 @@ fn push_rule_evaluator() { true, vec![], true, + true, ) .unwrap(); @@ -482,7 +517,10 @@ fn test_requires_room_version_supports_condition() { use crate::push::{PushRule, PushRules}; let mut flattened_keys = BTreeMap::new(); - flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string()); + flattened_keys.insert( + "content.body".to_string(), + SimpleJsonValue::Str("foo bar bob hello".to_string()), + ); let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()]; let evaluator = PushRuleEvaluator::py_new( flattened_keys, @@ -496,6 +534,7 @@ fn test_requires_room_version_supports_condition() { false, flags, true, + true, ) .unwrap(); diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 3c4f876cab..79e519fe11 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -56,7 +56,9 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use anyhow::{Context, Error}; use log::warn; +use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; +use pyo3::types::{PyBool, PyLong, PyString}; use pythonize::{depythonize, pythonize}; use serde::de::Error as _; use serde::{Deserialize, Serialize}; @@ -248,6 +250,36 @@ impl<'de> Deserialize<'de> for Action { } } +/// A simple JSON values (string, int, boolean, or null). +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(untagged)] +pub enum SimpleJsonValue { + Str(String), + Int(i64), + Bool(bool), + Null, +} + +impl<'source> FromPyObject<'source> for SimpleJsonValue { + fn extract(ob: &'source PyAny) -> PyResult { + if let Ok(s) = ::try_from(ob) { + Ok(SimpleJsonValue::Str(s.to_string())) + // A bool *is* an int, ensure we try bool first. + } else if let Ok(b) = ::try_from(ob) { + Ok(SimpleJsonValue::Bool(b.extract()?)) + } else if let Ok(i) = ::try_from(ob) { + Ok(SimpleJsonValue::Int(i.extract()?)) + } else if ob.is_none() { + Ok(SimpleJsonValue::Null) + } else { + Err(PyTypeError::new_err(format!( + "Can't convert from {} to SimpleJsonValue", + ob.get_type().name()? + ))) + } + } +} + /// A condition used in push rules to match against an event. /// /// We need this split as `serde` doesn't give us the ability to have a @@ -267,6 +299,8 @@ pub enum Condition { #[serde(tag = "kind")] pub enum KnownCondition { EventMatch(EventMatchCondition), + #[serde(rename = "com.beeper.msc3758.exact_event_match")] + ExactEventMatch(ExactEventMatchCondition), #[serde(rename = "im.nheko.msc3664.related_event_match")] RelatedEventMatch(RelatedEventMatchCondition), #[serde(rename = "org.matrix.msc3952.is_user_mention")] @@ -309,6 +343,13 @@ pub struct EventMatchCondition { pub pattern_type: Option>, } +/// The body of a [`Condition::ExactEventMatch`] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ExactEventMatchCondition { + pub key: Cow<'static, str>, + pub value: Cow<'static, SimpleJsonValue>, +} + /// The body of a [`Condition::RelatedEventMatch`] #[derive(Serialize, Deserialize, Debug, Clone)] pub struct RelatedEventMatchCondition { @@ -542,6 +583,48 @@ fn test_deserialize_unstable_msc3931_condition() { )); } +#[test] +fn test_deserialize_unstable_msc3758_condition() { + // A string condition should work. + let json = + r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":"foo"}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); + + // A boolean condition should work. + let json = + r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":true}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); + + // An integer condition should work. + let json = r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":1}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); + + // A null condition should work + let json = + r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":null}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); +} + #[test] fn test_deserialize_unstable_msc3952_user_condition() { let json = r#"{"kind":"org.matrix.msc3952.is_user_mention"}"#; diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 754acab2f9..328f681a29 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -14,7 +14,7 @@ from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union -from synapse.types import JsonDict +from synapse.types import JsonDict, SimpleJsonValue class PushRule: @property @@ -56,17 +56,18 @@ def get_base_rule_ids() -> Collection[str]: ... class PushRuleEvaluator: def __init__( self, - flattened_keys: Mapping[str, str], + flattened_keys: Mapping[str, SimpleJsonValue], has_mentions: bool, user_mentions: Set[str], room_mention: bool, room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], - related_events_flattened: Mapping[str, Mapping[str, str]], + related_events_flattened: Mapping[str, Mapping[str, SimpleJsonValue]], related_event_match_enabled: bool, room_version_feature_flags: Tuple[str, ...], msc3931_enabled: bool, + msc3758_exact_event_match: bool, ): ... def run( self, diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 5e3a889081..6ac2f0c10d 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -169,6 +169,11 @@ class ExperimentalConfig(Config): # MSC3925: do not replace events with their edits self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False) + # MSC3758: exact_event_match push rule condition + self.msc3758_exact_event_match = experimental.get( + "msc3758_exact_event_match", False + ) + # MSC3873: Disambiguate event_match keys. self.msc3783_escape_event_match_key = experimental.get( "msc3783_escape_event_match_key", False diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 39d2f88f03..8568aca528 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -43,6 +43,7 @@ from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator +from synapse.types import SimpleJsonValue from synapse.types.state import StateFilter from synapse.util.caches import register_cache from synapse.util.metrics import measure_func @@ -256,13 +257,15 @@ class BulkPushRuleEvaluator: return pl_event.content if pl_event else {}, sender_level - async def _related_events(self, event: EventBase) -> Dict[str, Dict[str, str]]: + async def _related_events( + self, event: EventBase + ) -> Dict[str, Dict[str, SimpleJsonValue]]: """Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation Returns: Mapping of relation type to flattened events. """ - related_events: Dict[str, Dict[str, str]] = {} + related_events: Dict[str, Dict[str, SimpleJsonValue]] = {} if self._related_event_match_enabled: related_event_id = event.content.get("m.relates_to", {}).get("event_id") relation_type = event.content.get("m.relates_to", {}).get("rel_type") @@ -425,6 +428,7 @@ class BulkPushRuleEvaluator: self._related_event_match_enabled, event.room_version.msc3931_push_features, self.hs.config.experimental.msc1767_enabled, # MSC3931 flag + self.hs.config.experimental.msc3758_exact_event_match, ) users = rules_by_user.keys() @@ -501,15 +505,15 @@ StateGroup = Union[object, int] def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], prefix: Optional[List[str]] = None, - result: Optional[Dict[str, str]] = None, + result: Optional[Dict[str, SimpleJsonValue]] = None, *, msc3783_escape_event_match_key: bool = False, -) -> Dict[str, str]: +) -> Dict[str, SimpleJsonValue]: """ Given a JSON dictionary (or event) which might contain sub dictionaries, flatten it into a single layer dictionary by combining the keys & sub-keys. - Any (non-dictionary), non-string value is dropped. + String, integer, boolean, and null values are kept. All others are dropped. Transforms: @@ -538,8 +542,8 @@ def _flatten_dict( # nested fields. key = key.replace("\\", "\\\\").replace(".", "\\.") - if isinstance(value, str): - result[".".join(prefix + [key])] = value.lower() + if isinstance(value, (bool, str)) or type(value) is int or value is None: + result[".".join(prefix + [key])] = value elif isinstance(value, Mapping): # do not set `room_version` due to recursion considerations below _flatten_dict( diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index f82d1cfc29..52e366c8ae 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -69,6 +69,8 @@ StateMap = Mapping[StateKey, T] MutableStateMap = MutableMapping[StateKey, T] # JSON types. These could be made stronger, but will do for now. +# A "simple" (canonical) JSON value. +SimpleJsonValue = Optional[Union[str, int, bool]] # A JSON-serialisable dict. JsonDict = Dict[str, Any] # A JSON-serialisable mapping; roughly speaking an immutable JSONDict. diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 516b65cc3c..6603447341 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -57,7 +57,7 @@ class FlattenDictTestCase(unittest.TestCase): ) def test_non_string(self) -> None: - """Non-string items are dropped.""" + """Booleans, ints, and nulls should be kept while other items are dropped.""" input: Dict[str, Any] = { "woo": "woo", "foo": True, @@ -66,7 +66,9 @@ class FlattenDictTestCase(unittest.TestCase): "fuzz": [], "boo": {}, } - self.assertEqual({"woo": "woo"}, _flatten_dict(input)) + self.assertEqual( + {"woo": "woo", "foo": True, "bar": 1, "baz": None}, _flatten_dict(input) + ) def test_event(self) -> None: """Events can also be flattened.""" @@ -86,9 +88,9 @@ class FlattenDictTestCase(unittest.TestCase): ) expected = { "content.msgtype": "m.text", - "content.body": "hello world!", + "content.body": "Hello world!", "content.format": "org.matrix.custom.html", - "content.formatted_body": "

hello world!

", + "content.formatted_body": "

Hello world!

", "room_id": "!test:test", "sender": "@alice:test", "type": "m.room.message", @@ -166,6 +168,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): related_event_match_enabled=True, room_version_feature_flags=event.room_version.msc3931_push_features, msc3931_enabled=True, + msc3758_exact_event_match=True, ) def test_display_name(self) -> None: @@ -410,6 +413,142 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "pattern should not match before a newline", ) + def test_exact_event_match_string(self) -> None: + """Check that exact_event_match conditions work as expected for strings.""" + + # Test against a string value. + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": "foobaz", + } + self._assert_matches( + condition, + {"value": "foobaz"}, + "exact value should match", + ) + self._assert_not_matches( + condition, + {"value": "FoobaZ"}, + "values should match and be case-sensitive", + ) + self._assert_not_matches( + condition, + {"value": "test foobaz test"}, + "values must exactly match", + ) + value: Any + for value in (True, False, 1, 1.1, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + # it should work on frozendicts too + self._assert_matches( + condition, + frozendict.frozendict({"value": "foobaz"}), + "values should match on frozendicts", + ) + + def test_exact_event_match_boolean(self) -> None: + """Check that exact_event_match conditions work as expected for booleans.""" + + # Test against a True boolean value. + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": True, + } + self._assert_matches( + condition, + {"value": True}, + "exact value should match", + ) + self._assert_not_matches( + condition, + {"value": False}, + "incorrect values should not match", + ) + for value in ("foobaz", 1, 1.1, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + # Test against a False boolean value. + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": False, + } + self._assert_matches( + condition, + {"value": False}, + "exact value should match", + ) + self._assert_not_matches( + condition, + {"value": True}, + "incorrect values should not match", + ) + # Choose false-y values to ensure there's no type coercion. + for value in ("", 0, 1.1, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + def test_exact_event_match_null(self) -> None: + """Check that exact_event_match conditions work as expected for null.""" + + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": None, + } + self._assert_matches( + condition, + {"value": None}, + "exact value should match", + ) + for value in ("foobaz", True, False, 1, 1.1, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + def test_exact_event_match_integer(self) -> None: + """Check that exact_event_match conditions work as expected for integers.""" + + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": 1, + } + self._assert_matches( + condition, + {"value": 1}, + "exact value should match", + ) + value: Any + for value in (1.1, -1, 0): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect values should not match", + ) + for value in ("1", True, False, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + def test_no_body(self) -> None: """Not having a body shouldn't break the evaluator.""" evaluator = self._get_evaluator({}) -- cgit 1.5.1 From d0c713cc85f094c323b2ba3f02d8ac411a7f0705 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 10 Feb 2023 23:29:00 +0000 Subject: Return read-only collections from `@cached` methods (#13755) It's important that collections returned from `@cached` methods are not modified, otherwise future retrievals from the cache will return the modified collection. This applies to the return values from `@cached` methods and the values inside the dictionaries returned by `@cachedList` methods. It's not necessary for the dictionaries returned by `@cachedList` methods themselves to be read-only. Signed-off-by: Sean Quah Co-authored-by: David Robertson --- changelog.d/13755.misc | 1 + synapse/app/phone_stats_home.py | 4 ++-- synapse/config/room_directory.py | 6 +++--- synapse/events/builder.py | 6 +++--- synapse/federation/federation_server.py | 3 ++- synapse/handlers/directory.py | 6 +++--- synapse/handlers/receipts.py | 4 ++-- synapse/handlers/room.py | 2 +- synapse/handlers/sync.py | 4 ++-- synapse/push/bulk_push_rule_evaluator.py | 4 ++-- synapse/state/__init__.py | 2 +- synapse/storage/controllers/state.py | 6 +++--- synapse/storage/databases/main/account_data.py | 2 +- synapse/storage/databases/main/appservice.py | 2 +- synapse/storage/databases/main/devices.py | 17 +++++++++------ synapse/storage/databases/main/directory.py | 4 ++-- synapse/storage/databases/main/end_to_end_keys.py | 25 +++++++++++++--------- synapse/storage/databases/main/event_federation.py | 11 ++++++---- .../storage/databases/main/monthly_active_users.py | 4 ++-- synapse/storage/databases/main/receipts.py | 10 +++++---- synapse/storage/databases/main/registration.py | 4 ++-- synapse/storage/databases/main/relations.py | 7 ++++-- synapse/storage/databases/main/roommember.py | 19 ++++++++-------- synapse/storage/databases/main/signatures.py | 6 +++--- synapse/storage/databases/main/tags.py | 8 ++++--- synapse/storage/databases/main/user_directory.py | 4 ++-- tests/rest/admin/test_server_notice.py | 4 ++-- 27 files changed, 98 insertions(+), 77 deletions(-) create mode 100644 changelog.d/13755.misc (limited to 'synapse') diff --git a/changelog.d/13755.misc b/changelog.d/13755.misc new file mode 100644 index 0000000000..662ee00e99 --- /dev/null +++ b/changelog.d/13755.misc @@ -0,0 +1 @@ +Re-type hint some collections as read-only. diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 53db1e85b3..897dd3edac 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -15,7 +15,7 @@ import logging import math import resource import sys -from typing import TYPE_CHECKING, List, Sized, Tuple +from typing import TYPE_CHECKING, List, Mapping, Sized, Tuple from prometheus_client import Gauge @@ -194,7 +194,7 @@ def start_phone_stats_home(hs: "HomeServer") -> None: @wrap_as_background_process("generate_monthly_active_users") async def generate_monthly_active_users() -> None: current_mau_count = 0 - current_mau_count_by_service = {} + current_mau_count_by_service: Mapping[str, int] = {} reserved_users: Sized = () store = hs.get_datastores().main if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only: diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 3ed236217f..8666c22f01 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, List +from typing import Any, Collection from matrix_common.regex import glob_to_regex @@ -70,7 +70,7 @@ class RoomDirectoryConfig(Config): return False def is_publishing_room_allowed( - self, user_id: str, room_id: str, aliases: List[str] + self, user_id: str, room_id: str, aliases: Collection[str] ) -> bool: """Checks if the given user is allowed to publish the room @@ -122,7 +122,7 @@ class _RoomDirectoryRule: except Exception as e: raise ConfigError("Failed to parse glob into regex") from e - def matches(self, user_id: str, room_id: str, aliases: List[str]) -> bool: + def matches(self, user_id: str, room_id: str, aliases: Collection[str]) -> bool: """Tests if this rule matches the given user_id, room_id and aliases. Args: diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 94dd1298e1..c82745275f 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Union import attr from signedjson.types import SigningKey @@ -103,7 +103,7 @@ class EventBuilder: async def build( self, - prev_event_ids: List[str], + prev_event_ids: Collection[str], auth_event_ids: Optional[List[str]], depth: Optional[int] = None, ) -> EventBase: @@ -136,7 +136,7 @@ class EventBuilder: format_version = self.room_version.event_format # The types of auth/prev events changes between event versions. - prev_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] + prev_events: Union[Collection[str], List[Tuple[str, Dict[str, str]]]] auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] if format_version == EventFormatVersions.ROOM_V1_V2: auth_events = await self._store.add_event_hashes(auth_event_ids) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 8d36172484..6addc0bb65 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -23,6 +23,7 @@ from typing import ( Collection, Dict, List, + Mapping, Optional, Tuple, Union, @@ -1512,7 +1513,7 @@ class FederationHandlerRegistry: def _get_event_ids_for_partial_state_join( join_event: EventBase, prev_state_ids: StateMap[str], - summary: Dict[str, MemberSummary], + summary: Mapping[str, MemberSummary], ) -> Collection[str]: """Calculate state to be returned in a partial_state send_join diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index d31b0fbb17..a5798e9483 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -14,7 +14,7 @@ import logging import string -from typing import TYPE_CHECKING, Iterable, List, Optional +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence from typing_extensions import Literal @@ -486,7 +486,7 @@ class DirectoryHandler: ) if canonical_alias: # Ensure we do not mutate room_aliases. - room_aliases = room_aliases + [canonical_alias] + room_aliases = list(room_aliases) + [canonical_alias] if not self.config.roomdirectory.is_publishing_room_allowed( user_id, room_id, room_aliases @@ -529,7 +529,7 @@ class DirectoryHandler: async def get_aliases_for_room( self, requester: Requester, room_id: str - ) -> List[str]: + ) -> Sequence[str]: """ Get a list of the aliases that currently point to this room on this server """ diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 04c61ae3dd..2bacdebfb5 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Tuple from synapse.api.constants import EduTypes, ReceiptTypes from synapse.appservice import ApplicationService @@ -189,7 +189,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]): @staticmethod def filter_out_private_receipts( - rooms: List[JsonDict], user_id: str + rooms: Sequence[JsonDict], user_id: str ) -> List[JsonDict]: """ Filters a list of serialized receipts (as returned by /sync and /initialSync) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 0e759b8a5d..060bbcb181 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1928,6 +1928,6 @@ class RoomShutdownHandler: return { "kicked_users": kicked_users, "failed_to_kick_users": failed_to_kick_users, - "local_aliases": aliases_for_room, + "local_aliases": list(aliases_for_room), "new_room_id": new_room_id, } diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 399685e5b7..4bae46158a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1519,7 +1519,7 @@ class SyncHandler: one_time_keys_count = await self.store.count_e2e_one_time_keys( user_id, device_id ) - unused_fallback_key_types = ( + unused_fallback_key_types = list( await self.store.get_e2e_unused_fallback_key_types(user_id, device_id) ) @@ -2301,7 +2301,7 @@ class SyncHandler: sync_result_builder: "SyncResultBuilder", room_builder: "RoomSyncResultBuilder", ephemeral: List[JsonDict], - tags: Optional[Dict[str, Dict[str, Any]]], + tags: Optional[Mapping[str, Mapping[str, Any]]], account_data: Mapping[str, JsonDict], always_include: bool = False, ) -> None: diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 8568aca528..f6a5bffb0f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -22,6 +22,7 @@ from typing import ( List, Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -149,7 +150,7 @@ class BulkPushRuleEvaluator: # little, we can skip fetching a huge number of push rules in large rooms. # This helps make joins and leaves faster. if event.type == EventTypes.Member: - local_users = [] + local_users: Sequence[str] = [] # We never notify a user about their own actions. This is enforced in # `_action_for_event_by_user` in the loop over `rules_by_user`, but we # do the same check here to avoid unnecessary DB queries. @@ -184,7 +185,6 @@ class BulkPushRuleEvaluator: if event.type == EventTypes.Member and event.membership == Membership.INVITE: invited = event.state_key if invited and self.hs.is_mine_id(invited) and invited not in local_users: - local_users = list(local_users) local_users.append(invited) if not local_users: diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index e877e6f1a1..4dc25df67e 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -226,7 +226,7 @@ class StateHandler: return await ret.get_state(self._state_storage_controller, state_filter) async def get_current_user_ids_in_room( - self, room_id: str, latest_event_ids: List[str] + self, room_id: str, latest_event_ids: Collection[str] ) -> Set[str]: """ Get the users IDs who are currently in a room. diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 52efd4a171..9d7a8a792f 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -14,6 +14,7 @@ import logging from typing import ( TYPE_CHECKING, + AbstractSet, Any, Awaitable, Callable, @@ -23,7 +24,6 @@ from typing import ( List, Mapping, Optional, - Set, Tuple, ) @@ -527,7 +527,7 @@ class StateStorageController: ) return state_map.get(key) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]: """Get current hosts in room based on current state. Blocks until we have full state for the given room. This only happens for rooms @@ -584,7 +584,7 @@ class StateStorageController: async def get_users_in_room_with_profiles( self, room_id: str - ) -> Dict[str, ProfileInfo]: + ) -> Mapping[str, ProfileInfo]: """ Get the current users in the room with their profiles. If the room is currently partial-stated, this will block until the room has diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 2d6f02c14f..95567826f2 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -240,7 +240,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) @cached(num_args=2, tree=True) async def get_account_data_for_room( self, user_id: str, room_id: str - ) -> Dict[str, JsonDict]: + ) -> Mapping[str, JsonDict]: """Get all the client account_data for a user for a room. Args: diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 5fb152c4ff..484db175d0 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -166,7 +166,7 @@ class ApplicationServiceWorkerStore(RoomMemberWorkerStore): room_id: str, app_service: "ApplicationService", cache_context: _CacheContext, - ) -> List[str]: + ) -> Sequence[str]: """ Get all users in a room that the appservice controls. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 85c1778a81..1ca66d57d4 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -21,6 +21,7 @@ from typing import ( Dict, Iterable, List, + Mapping, Optional, Set, Tuple, @@ -202,7 +203,9 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): def get_device_stream_token(self) -> int: return self._device_list_id_gen.get_current_token() - async def count_devices_by_users(self, user_ids: Optional[List[str]] = None) -> int: + async def count_devices_by_users( + self, user_ids: Optional[Collection[str]] = None + ) -> int: """Retrieve number of all devices of given users. Only returns number of devices that are not marked as hidden. @@ -213,7 +216,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): """ def count_devices_by_users_txn( - txn: LoggingTransaction, user_ids: List[str] + txn: LoggingTransaction, user_ids: Collection[str] ) -> int: sql = """ SELECT count(*) @@ -747,7 +750,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): @cancellable async def get_user_devices_from_cache( self, user_ids: Set[str], user_and_device_ids: List[Tuple[str, str]] - ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]: + ) -> Tuple[Set[str], Dict[str, Mapping[str, JsonDict]]]: """Get the devices (and keys if any) for remote users from the cache. Args: @@ -775,16 +778,18 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): user_ids_not_in_cache = unique_user_ids - user_ids_in_cache # First fetch all the users which all devices are to be returned. - results: Dict[str, Dict[str, JsonDict]] = {} + results: Dict[str, Mapping[str, JsonDict]] = {} for user_id in user_ids: if user_id in user_ids_in_cache: results[user_id] = await self.get_cached_devices_for_user(user_id) # Then fetch all device-specific requests, but skip users we've already # fetched all devices for. + device_specific_results: Dict[str, Dict[str, JsonDict]] = {} for user_id, device_id in user_and_device_ids: if user_id in user_ids_in_cache and user_id not in user_ids: device = await self._get_cached_user_device(user_id, device_id) - results.setdefault(user_id, {})[device_id] = device + device_specific_results.setdefault(user_id, {})[device_id] = device + results.update(device_specific_results) set_tag("in_cache", str(results)) set_tag("not_in_cache", str(user_ids_not_in_cache)) @@ -802,7 +807,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): return db_to_json(content) @cached() - async def get_cached_devices_for_user(self, user_id: str) -> Dict[str, JsonDict]: + async def get_cached_devices_for_user(self, user_id: str) -> Mapping[str, JsonDict]: devices = await self.db_pool.simple_select_list( table="device_lists_remote_cache", keyvalues={"user_id": user_id}, diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py index 5903fdaf00..44aa181174 100644 --- a/synapse/storage/databases/main/directory.py +++ b/synapse/storage/databases/main/directory.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, List, Optional, Tuple +from typing import Iterable, List, Optional, Sequence, Tuple import attr @@ -74,7 +74,7 @@ class DirectoryWorkerStore(CacheInvalidationWorkerStore): ) @cached(max_entries=5000) - async def get_aliases_for_room(self, room_id: str) -> List[str]: + async def get_aliases_for_room(self, room_id: str) -> Sequence[str]: return await self.db_pool.simple_select_onecol( "room_aliases", {"room_id": room_id}, diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index c4ac6c33ba..752dc16e17 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -20,7 +20,9 @@ from typing import ( Dict, Iterable, List, + Mapping, Optional, + Sequence, Tuple, Union, cast, @@ -691,7 +693,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker @cached(max_entries=10000) async def get_e2e_unused_fallback_key_types( self, user_id: str, device_id: str - ) -> List[str]: + ) -> Sequence[str]: """Returns the fallback key types that have an unused key. Args: @@ -731,7 +733,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker return user_keys.get(key_type) @cached(num_args=1) - def _get_bare_e2e_cross_signing_keys(self, user_id: str) -> Dict[str, JsonDict]: + def _get_bare_e2e_cross_signing_keys(self, user_id: str) -> Mapping[str, JsonDict]: """Dummy function. Only used to make a cache for _get_bare_e2e_cross_signing_keys_bulk. """ @@ -744,7 +746,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ) async def _get_bare_e2e_cross_signing_keys_bulk( self, user_ids: Iterable[str] - ) -> Dict[str, Optional[Dict[str, JsonDict]]]: + ) -> Dict[str, Optional[Mapping[str, JsonDict]]]: """Returns the cross-signing keys for a set of users. The output of this function should be passed to _get_e2e_cross_signing_signatures_txn if the signatures for the calling user need to be fetched. @@ -765,7 +767,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ) # The `Optional` comes from the `@cachedList` decorator. - return cast(Dict[str, Optional[Dict[str, JsonDict]]], result) + return cast(Dict[str, Optional[Mapping[str, JsonDict]]], result) def _get_bare_e2e_cross_signing_keys_bulk_txn( self, @@ -924,7 +926,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker @cancellable async def get_e2e_cross_signing_keys_bulk( self, user_ids: List[str], from_user_id: Optional[str] = None - ) -> Dict[str, Optional[Dict[str, JsonDict]]]: + ) -> Dict[str, Optional[Mapping[str, JsonDict]]]: """Returns the cross-signing keys for a set of users. Args: @@ -940,11 +942,14 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker result = await self._get_bare_e2e_cross_signing_keys_bulk(user_ids) if from_user_id: - result = await self.db_pool.runInteraction( - "get_e2e_cross_signing_signatures", - self._get_e2e_cross_signing_signatures_txn, - result, - from_user_id, + result = cast( + Dict[str, Optional[Mapping[str, JsonDict]]], + await self.db_pool.runInteraction( + "get_e2e_cross_signing_signatures", + self._get_e2e_cross_signing_signatures_txn, + result, + from_user_id, + ), ) return result diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index bbee02ab18..ca780cca36 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -22,6 +22,7 @@ from typing import ( Iterable, List, Optional, + Sequence, Set, Tuple, cast, @@ -1004,7 +1005,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas room_id, ) - async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[Optional[str], int]: + async def get_max_depth_of( + self, event_ids: Collection[str] + ) -> Tuple[Optional[str], int]: """Returns the event ID and depth for the event that has the max depth from a set of event IDs Args: @@ -1141,7 +1144,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) @cached(max_entries=5000, iterable=True) - async def get_latest_event_ids_in_room(self, room_id: str) -> List[str]: + async def get_latest_event_ids_in_room(self, room_id: str) -> Sequence[str]: return await self.db_pool.simple_select_onecol( table="event_forward_extremities", keyvalues={"room_id": room_id}, @@ -1171,7 +1174,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas @cancellable async def get_forward_extremities_for_room_at_stream_ordering( self, room_id: str, stream_ordering: int - ) -> List[str]: + ) -> Sequence[str]: """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". @@ -1204,7 +1207,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas @cached(max_entries=5000, num_args=2) async def _get_forward_extremeties_for_room( self, room_id: str, stream_ordering: int - ) -> List[str]: + ) -> Sequence[str]: """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py index db9a24db5e..4b1061e6d7 100644 --- a/synapse/storage/databases/main/monthly_active_users.py +++ b/synapse/storage/databases/main/monthly_active_users.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, cast from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage.database import ( @@ -95,7 +95,7 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore): return await self.db_pool.runInteraction("count_users", _count_users) @cached(num_args=0) - async def get_monthly_active_count_by_service(self) -> Dict[str, int]: + async def get_monthly_active_count_by_service(self) -> Mapping[str, int]: """Generates current count of monthly active users broken down by service. A service is typically an appservice but also includes native matrix users. Since the `monthly_active_users` table is populated from the `user_ips` table diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 29972d5204..dddf49c2d5 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -21,7 +21,9 @@ from typing import ( Dict, Iterable, List, + Mapping, Optional, + Sequence, Tuple, cast, ) @@ -288,7 +290,7 @@ class ReceiptsWorkerStore(SQLBaseStore): async def get_linearized_receipts_for_room( self, room_id: str, to_key: int, from_key: Optional[int] = None - ) -> List[dict]: + ) -> Sequence[JsonDict]: """Get receipts for a single room for sending to clients. Args: @@ -311,7 +313,7 @@ class ReceiptsWorkerStore(SQLBaseStore): @cached(tree=True) async def _get_linearized_receipts_for_room( self, room_id: str, to_key: int, from_key: Optional[int] = None - ) -> List[JsonDict]: + ) -> Sequence[JsonDict]: """See get_linearized_receipts_for_room""" def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: @@ -354,7 +356,7 @@ class ReceiptsWorkerStore(SQLBaseStore): ) async def _get_linearized_receipts_for_rooms( self, room_ids: Collection[str], to_key: int, from_key: Optional[int] = None - ) -> Dict[str, List[JsonDict]]: + ) -> Dict[str, Sequence[JsonDict]]: if not room_ids: return {} @@ -416,7 +418,7 @@ class ReceiptsWorkerStore(SQLBaseStore): ) async def get_linearized_receipts_for_all_rooms( self, to_key: int, from_key: Optional[int] = None - ) -> Dict[str, JsonDict]: + ) -> Mapping[str, JsonDict]: """Get receipts for all rooms between two stream_ids, up to a limit of the latest 100 read receipts. diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 31f0f2bd3d..9a55e17624 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -16,7 +16,7 @@ import logging import random import re -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast import attr @@ -192,7 +192,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): ) @cached() - async def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]: + async def get_user_by_id(self, user_id: str) -> Optional[Mapping[str, Any]]: """Deprecated: use get_userinfo_by_id instead""" def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]: diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 0018d6f7ab..fa3266c081 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -22,6 +22,7 @@ from typing import ( List, Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -171,7 +172,7 @@ class RelationsWorkerStore(SQLBaseStore): direction: Direction = Direction.BACKWARDS, from_token: Optional[StreamToken] = None, to_token: Optional[StreamToken] = None, - ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]: + ) -> Tuple[Sequence[_RelatedEvent], Optional[StreamToken]]: """Get a list of relations for an event, ordered by topological ordering. Args: @@ -397,7 +398,9 @@ class RelationsWorkerStore(SQLBaseStore): return result is not None @cached() - async def get_aggregation_groups_for_event(self, event_id: str) -> List[JsonDict]: + async def get_aggregation_groups_for_event( + self, event_id: str + ) -> Sequence[JsonDict]: raise NotImplementedError() @cachedList( diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index ea6a5e2f34..694a5b802c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -24,6 +24,7 @@ from typing import ( List, Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -153,7 +154,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return self._known_servers_count @cached(max_entries=100000, iterable=True) - async def get_users_in_room(self, room_id: str) -> List[str]: + async def get_users_in_room(self, room_id: str) -> Sequence[str]: """Returns a list of users in the room. Will return inaccurate results for rooms with partial state, since the state for @@ -190,9 +191,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) @cached() - def get_user_in_room_with_profile( - self, room_id: str, user_id: str - ) -> Dict[str, ProfileInfo]: + def get_user_in_room_with_profile(self, room_id: str, user_id: str) -> ProfileInfo: raise NotImplementedError() @cachedList( @@ -246,7 +245,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(max_entries=100000, iterable=True) async def get_users_in_room_with_profiles( self, room_id: str - ) -> Dict[str, ProfileInfo]: + ) -> Mapping[str, ProfileInfo]: """Get a mapping from user ID to profile information for all users in a given room. The profile information comes directly from this room's `m.room.member` @@ -285,7 +284,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) @cached(max_entries=100000) - async def get_room_summary(self, room_id: str) -> Dict[str, MemberSummary]: + async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]: """Get the details of a room roughly suitable for use by the room summary extension to /sync. Useful when lazy loading room members. Args: @@ -357,7 +356,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached() async def get_invited_rooms_for_local_user( self, user_id: str - ) -> List[RoomsForUser]: + ) -> Sequence[RoomsForUser]: """Get all the rooms the *local* user is invited to. Args: @@ -475,7 +474,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results @cached(iterable=True) - async def get_local_users_in_room(self, room_id: str) -> List[str]: + async def get_local_users_in_room(self, room_id: str) -> Sequence[str]: """ Retrieves a list of the current roommembers who are local to the server. """ @@ -791,7 +790,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): """Returns the set of users who share a room with `user_id`""" room_ids = await self.get_rooms_for_user(user_id) - user_who_share_room = set() + user_who_share_room: Set[str] = set() for room_id in room_ids: user_ids = await self.get_users_in_room(room_id) user_who_share_room.update(user_ids) @@ -953,7 +952,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return True @cached(iterable=True, max_entries=10000) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]: """Get current hosts in room based on current state.""" # First we check if we already have `get_users_in_room` in the cache, as diff --git a/synapse/storage/databases/main/signatures.py b/synapse/storage/databases/main/signatures.py index 05da15074a..5dcb1fc0b5 100644 --- a/synapse/storage/databases/main/signatures.py +++ b/synapse/storage/databases/main/signatures.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Collection, Dict, List, Tuple +from typing import Collection, Dict, List, Mapping, Tuple from unpaddedbase64 import encode_base64 @@ -26,7 +26,7 @@ from synapse.util.caches.descriptors import cached, cachedList class SignatureWorkerStore(EventsWorkerStore): @cached() - def get_event_reference_hash(self, event_id: str) -> Dict[str, Dict[str, bytes]]: + def get_event_reference_hash(self, event_id: str) -> Mapping[str, bytes]: # This is a dummy function to allow get_event_reference_hashes # to use its cache raise NotImplementedError() @@ -36,7 +36,7 @@ class SignatureWorkerStore(EventsWorkerStore): ) async def get_event_reference_hashes( self, event_ids: Collection[str] - ) -> Dict[str, Dict[str, bytes]]: + ) -> Mapping[str, Mapping[str, bytes]]: """Get all hashes for given events. Args: diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py index d5500cdd47..c149a9eacb 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import Any, Dict, Iterable, List, Tuple, cast +from typing import Any, Dict, Iterable, List, Mapping, Tuple, cast from synapse.api.constants import AccountDataTypes from synapse.replication.tcp.streams import AccountDataStream @@ -32,7 +32,9 @@ logger = logging.getLogger(__name__) class TagsWorkerStore(AccountDataWorkerStore): @cached() - async def get_tags_for_user(self, user_id: str) -> Dict[str, Dict[str, JsonDict]]: + async def get_tags_for_user( + self, user_id: str + ) -> Mapping[str, Mapping[str, JsonDict]]: """Get all the tags for a user. @@ -107,7 +109,7 @@ class TagsWorkerStore(AccountDataWorkerStore): async def get_updated_tags( self, user_id: str, stream_id: int - ) -> Dict[str, Dict[str, JsonDict]]: + ) -> Mapping[str, Mapping[str, JsonDict]]: """Get all the tags for the rooms where the tags have changed since the given version diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 14ef5b040d..f6a6fd4079 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -16,9 +16,9 @@ import logging import re from typing import ( TYPE_CHECKING, - Dict, Iterable, List, + Mapping, Optional, Sequence, Set, @@ -586,7 +586,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): ) @cached() - async def get_user_in_directory(self, user_id: str) -> Optional[Dict[str, str]]: + async def get_user_in_directory(self, user_id: str) -> Optional[Mapping[str, str]]: return await self.db_pool.simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, diff --git a/tests/rest/admin/test_server_notice.py b/tests/rest/admin/test_server_notice.py index a2f347f666..f71ff46d87 100644 --- a/tests/rest/admin/test_server_notice.py +++ b/tests/rest/admin/test_server_notice.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +from typing import List, Sequence from twisted.test.proto_helpers import MemoryReactor @@ -558,7 +558,7 @@ class ServerNoticeTestCase(unittest.HomeserverTestCase): def _check_invite_and_join_status( self, user_id: str, expected_invites: int, expected_memberships: int - ) -> List[RoomsForUser]: + ) -> Sequence[RoomsForUser]: """Check invite and room membership status of a user. Args -- cgit 1.5.1 From 6cddf24e361fe43f086307c833cd814dc03363b6 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Sat, 11 Feb 2023 00:31:05 +0100 Subject: Faster joins: don't stall when a user joins during a fast join (#14606) Fixes #12801. Complement tests are at https://github.com/matrix-org/complement/pull/567. Avoid blocking on full state when handling a subsequent join into a partial state room. Also always perform a remote join into partial state rooms, since we do not know whether the joining user has been banned and want to avoid leaking history to banned users. Signed-off-by: Mathieu Velten Co-authored-by: Sean Quah Co-authored-by: David Robertson --- changelog.d/14606.misc | 1 + synapse/api/errors.py | 22 ++++++ synapse/federation/federation_server.py | 2 +- synapse/handlers/event_auth.py | 16 ++--- synapse/handlers/federation.py | 2 +- synapse/handlers/federation_event.py | 59 ++++++++++++++-- synapse/handlers/message.py | 2 +- synapse/handlers/room.py | 2 +- synapse/handlers/room_member.py | 118 ++++++++++++++++++++++--------- synapse/handlers/room_member_worker.py | 5 +- synapse/storage/databases/main/events.py | 21 +----- tests/handlers/test_federation.py | 40 +++++------ 12 files changed, 196 insertions(+), 94 deletions(-) create mode 100644 changelog.d/14606.misc (limited to 'synapse') diff --git a/changelog.d/14606.misc b/changelog.d/14606.misc new file mode 100644 index 0000000000..e2debc96d8 --- /dev/null +++ b/changelog.d/14606.misc @@ -0,0 +1 @@ +Faster joins: don't stall when another user joins during a fast join resync. diff --git a/synapse/api/errors.py b/synapse/api/errors.py index c2c177fd71..9235ce6536 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -751,3 +751,25 @@ class ModuleFailedException(Exception): Raised when a module API callback fails, for example because it raised an exception. """ + + +class PartialStateConflictError(SynapseError): + """An internal error raised when attempting to persist an event with partial state + after the room containing the event has been un-partial stated. + + This error should be handled by recomputing the event context and trying again. + + This error has an HTTP status code so that it can be transported over replication. + It should not be exposed to clients. + """ + + @staticmethod + def message() -> str: + return "Cannot persist partial state event in un-partial stated room" + + def __init__(self) -> None: + super().__init__( + HTTPStatus.CONFLICT, + msg=PartialStateConflictError.message(), + errcode=Codes.UNKNOWN, + ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 6addc0bb65..6d99845de5 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -48,6 +48,7 @@ from synapse.api.errors import ( FederationError, IncompatibleRoomVersionError, NotFoundError, + PartialStateConflictError, SynapseError, UnsupportedRoomVersionError, ) @@ -81,7 +82,6 @@ from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, ReplicationGetQueryRestServlet, ) -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.lock import Lock from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.roommember import MemberSummary diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index a23a8ce2a1..46dd63c3f0 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -202,7 +202,7 @@ class EventAuthHandler: state_ids: StateMap[str], room_version: RoomVersion, user_id: str, - prev_member_event: Optional[EventBase], + prev_membership: Optional[str], ) -> None: """ Check whether a user can join a room without an invite due to restricted join rules. @@ -214,15 +214,14 @@ class EventAuthHandler: state_ids: The state of the room as it currently is. room_version: The room version of the room being joined. user_id: The user joining the room. - prev_member_event: The current membership event for this user. + prev_membership: The current membership state for this user. `None` if the + user has never joined the room (equivalent to "leave"). Raises: AuthError if the user cannot join the room. """ # If the member is invited or currently joined, then nothing to do. - if prev_member_event and ( - prev_member_event.membership in (Membership.JOIN, Membership.INVITE) - ): + if prev_membership in (Membership.JOIN, Membership.INVITE): return # This is not a room with a restricted join rule, so we don't need to do the @@ -255,13 +254,14 @@ class EventAuthHandler: ) async def has_restricted_join_rules( - self, state_ids: StateMap[str], room_version: RoomVersion + self, partial_state_ids: StateMap[str], room_version: RoomVersion ) -> bool: """ Return if the room has the proper join rules set for access via rooms. Args: - state_ids: The state of the room as it currently is. + state_ids: The state of the room as it currently is. May be full or partial + state. room_version: The room version of the room to query. Returns: @@ -272,7 +272,7 @@ class EventAuthHandler: return False # If there's no join rule, then it defaults to invite (so this doesn't apply). - join_rules_event_id = state_ids.get((EventTypes.JoinRules, ""), None) + join_rules_event_id = partial_state_ids.get((EventTypes.JoinRules, ""), None) if not join_rules_event_id: return False diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 43ed4a3dd1..08727e4857 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -49,6 +49,7 @@ from synapse.api.errors import ( FederationPullAttemptBackoffError, HttpResponseException, NotFoundError, + PartialStateConflictError, RequestSendFailed, SynapseError, ) @@ -68,7 +69,6 @@ from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, ) -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import JsonDict, StrCollection, get_domain_from_id from synapse.types.state import StateFilter diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 3561f2f1de..b7136f8d1c 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -47,6 +47,7 @@ from synapse.api.errors import ( FederationError, FederationPullAttemptBackoffError, HttpResponseException, + PartialStateConflictError, RequestSendFailed, SynapseError, ) @@ -74,7 +75,6 @@ from synapse.replication.http.federation import ( ReplicationFederationSendEventsRestServlet, ) from synapse.state import StateResolutionStore -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( PersistedEventPosition, @@ -441,16 +441,17 @@ class FederationEventHandler: # Check if the user is already in the room or invited to the room. user_id = event.state_key prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None) - prev_member_event = None + prev_membership = None if prev_member_event_id: prev_member_event = await self._store.get_event(prev_member_event_id) + prev_membership = prev_member_event.membership # Check if the member should be allowed access via membership in a space. await self._event_auth_handler.check_restricted_join_rules( prev_state_ids, event.room_version, user_id, - prev_member_event, + prev_membership, ) @trace @@ -526,11 +527,57 @@ class FederationEventHandler: "Peristing join-via-remote %s (partial_state: %s)", event, partial_state ) with nested_logging_context(suffix=event.event_id): + if partial_state: + # When handling a second partial state join into a partial state room, + # the returned state will exclude the membership from the first join. To + # preserve prior memberships, we try to compute the partial state before + # the event ourselves if we know about any of the prev events. + # + # When we don't know about any of the prev events, it's fine to just use + # the returned state, since the new join will create a new forward + # extremity, and leave the forward extremity containing our prior + # memberships alone. + prev_event_ids = set(event.prev_event_ids()) + seen_event_ids = await self._store.have_events_in_timeline( + prev_event_ids + ) + missing_event_ids = prev_event_ids - seen_event_ids + + state_maps_to_resolve: List[StateMap[str]] = [] + + # Fetch the state after the prev events that we know about. + state_maps_to_resolve.extend( + ( + await self._state_storage_controller.get_state_groups_ids( + room_id, seen_event_ids, await_full_state=False + ) + ).values() + ) + + # When there are prev events we do not have the state for, we state + # resolve with the state returned by the remote homeserver. + if missing_event_ids or len(state_maps_to_resolve) == 0: + state_maps_to_resolve.append( + {(e.type, e.state_key): e.event_id for e in state} + ) + + state_ids_before_event = ( + await self._state_resolution_handler.resolve_events_with_store( + event.room_id, + room_version.identifier, + state_maps_to_resolve, + event_map=None, + state_res_store=StateResolutionStore(self._store), + ) + ) + else: + state_ids_before_event = { + (e.type, e.state_key): e.event_id for e in state + } + context = await self._state_handler.compute_event_context( event, - state_ids_before_event={ - (e.type, e.state_key): e.event_id for e in state - }, + state_ids_before_event=state_ids_before_event, partial_state=partial_state, ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3e30f52e4d..8f5b658d9d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -38,6 +38,7 @@ from synapse.api.errors import ( Codes, ConsentNotGivenError, NotFoundError, + PartialStateConflictError, ShadowBanError, SynapseError, UnstableSpecAuthError, @@ -57,7 +58,6 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.send_event import ReplicationSendEventRestServlet from synapse.replication.http.send_events import ReplicationSendEventsRestServlet -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( MutableStateMap, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 060bbcb181..837dabb3b7 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -43,6 +43,7 @@ from synapse.api.errors import ( Codes, LimitExceededError, NotFoundError, + PartialStateConflictError, StoreError, SynapseError, ) @@ -54,7 +55,6 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.streams import EventSource from synapse.types import ( JsonDict, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6e7141d2ef..a965c7ec76 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -26,7 +26,13 @@ from synapse.api.constants import ( GuestAccess, Membership, ) -from synapse.api.errors import AuthError, Codes, ShadowBanError, SynapseError +from synapse.api.errors import ( + AuthError, + Codes, + PartialStateConflictError, + ShadowBanError, + SynapseError, +) from synapse.api.ratelimiting import Ratelimiter from synapse.event_auth import get_named_level, get_power_level_event from synapse.events import EventBase @@ -34,7 +40,6 @@ from synapse.events.snapshot import EventContext from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN from synapse.logging import opentracing from synapse.module_api import NOT_SPAM -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.types import ( JsonDict, Requester, @@ -56,6 +61,13 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +class NoKnownServersError(SynapseError): + """No server already resident to the room was provided to the join/knock operation.""" + + def __init__(self, msg: str = "No known servers"): + super().__init__(404, msg) + + class RoomMemberHandler(metaclass=abc.ABCMeta): # TODO(paul): This handler currently contains a messy conflation of # low-level API that works on UserID objects and so on, and REST-level @@ -185,6 +197,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id: Room that we are trying to join user: User who is trying to join content: A dict that should be used as the content of the join event. + + Raises: + NoKnownServersError: if remote_room_hosts does not contain a server joined to + the room. """ raise NotImplementedError() @@ -823,14 +839,19 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): latest_event_ids = await self.store.get_prev_events_for_room(room_id) - state_before_join = await self.state_handler.compute_state_after_events( - room_id, latest_event_ids + is_partial_state_room = await self.store.is_partial_state_room(room_id) + partial_state_before_join = await self.state_handler.compute_state_after_events( + room_id, latest_event_ids, await_full_state=False ) + # `is_partial_state_room` also indicates whether `partial_state_before_join` is + # partial. # TODO: Refactor into dictionary of explicitly allowed transitions # between old and new state, with specific error messages for some # transitions and generic otherwise - old_state_id = state_before_join.get((EventTypes.Member, target.to_string())) + old_state_id = partial_state_before_join.get( + (EventTypes.Member, target.to_string()) + ) if old_state_id: old_state = await self.store.get_event(old_state_id, allow_none=True) old_membership = old_state.content.get("membership") if old_state else None @@ -881,11 +902,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if action == "kick": raise AuthError(403, "The target user is not in the room") - is_host_in_room = await self._is_host_in_room(state_before_join) + is_host_in_room = await self._is_host_in_room(partial_state_before_join) if effective_membership_state == Membership.JOIN: if requester.is_guest: - guest_can_join = await self._can_guest_join(state_before_join) + guest_can_join = await self._can_guest_join(partial_state_before_join) if not guest_can_join: # This should be an auth check, but guests are a local concept, # so don't really fit into the general auth process. @@ -927,8 +948,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id, remote_room_hosts, content, + is_partial_state_room, is_host_in_room, - state_before_join, + partial_state_before_join, ) if remote_join: if ratelimit: @@ -1073,8 +1095,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id: str, remote_room_hosts: List[str], content: JsonDict, + is_partial_state_room: bool, is_host_in_room: bool, - state_before_join: StateMap[str], + partial_state_before_join: StateMap[str], ) -> Tuple[bool, List[str]]: """ Check whether the server should do a remote join (as opposed to a local @@ -1093,9 +1116,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): remote_room_hosts: A list of remote room hosts. content: The content to use as the event body of the join. This may be modified. - is_host_in_room: True if the host is in the room. - state_before_join: The state before the join event (i.e. the resolution of - the states after its parent events). + is_partial_state_room: `True` if the server currently doesn't hold the full + state of the room. + is_host_in_room: `True` if the host is in the room. + partial_state_before_join: The state before the join event (i.e. the + resolution of the states after its parent events). May be full or + partial state, depending on `is_partial_state_room`. Returns: A tuple of: @@ -1109,6 +1135,23 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if not is_host_in_room: return True, remote_room_hosts + prev_member_event_id = partial_state_before_join.get( + (EventTypes.Member, user_id), None + ) + previous_membership = None + if prev_member_event_id: + prev_member_event = await self.store.get_event(prev_member_event_id) + previous_membership = prev_member_event.membership + + # If we are not fully joined yet, and the target is not already in the room, + # let's do a remote join so another server with the full state can validate + # that the user has not been banned for example. + # We could just accept the join and wait for state res to resolve that later on + # but we would then leak room history to this person until then, which is pretty + # bad. + if is_partial_state_room and previous_membership != Membership.JOIN: + return True, remote_room_hosts + # If the host is in the room, but not one of the authorised hosts # for restricted join rules, a remote join must be used. room_version = await self.store.get_room_version(room_id) @@ -1116,21 +1159,19 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # If restricted join rules are not being used, a local join can always # be used. if not await self.event_auth_handler.has_restricted_join_rules( - state_before_join, room_version + partial_state_before_join, room_version ): return False, [] # If the user is invited to the room or already joined, the join # event can always be issued locally. - prev_member_event_id = state_before_join.get((EventTypes.Member, user_id), None) - prev_member_event = None - if prev_member_event_id: - prev_member_event = await self.store.get_event(prev_member_event_id) - if prev_member_event.membership in ( - Membership.JOIN, - Membership.INVITE, - ): - return False, [] + if previous_membership in (Membership.JOIN, Membership.INVITE): + return False, [] + + # All the partial state cases are covered above. We have been given the full + # state of the room. + assert not is_partial_state_room + state_before_join = partial_state_before_join # If the local host has a user who can issue invites, then a local # join can be done. @@ -1154,7 +1195,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # Ensure the member should be allowed access via membership in a room. await self.event_auth_handler.check_restricted_join_rules( - state_before_join, room_version, user_id, prev_member_event + state_before_join, room_version, user_id, previous_membership ) # If this is going to be a local join, additional information must @@ -1304,11 +1345,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if prev_member_event.membership == Membership.JOIN: await self._user_left_room(target_user, room_id) - async def _can_guest_join(self, current_state_ids: StateMap[str]) -> bool: + async def _can_guest_join(self, partial_current_state_ids: StateMap[str]) -> bool: """ Returns whether a guest can join a room based on its current state. + + Args: + partial_current_state_ids: The current state of the room. May be full or + partial state. """ - guest_access_id = current_state_ids.get((EventTypes.GuestAccess, ""), None) + guest_access_id = partial_current_state_ids.get( + (EventTypes.GuestAccess, ""), None + ) if not guest_access_id: return False @@ -1634,19 +1681,25 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ) return event, stream_id - async def _is_host_in_room(self, current_state_ids: StateMap[str]) -> bool: + async def _is_host_in_room(self, partial_current_state_ids: StateMap[str]) -> bool: + """Returns whether the homeserver is in the room based on its current state. + + Args: + partial_current_state_ids: The current state of the room. May be full or + partial state. + """ # Have we just created the room, and is this about to be the very # first member event? - create_event_id = current_state_ids.get(("m.room.create", "")) - if len(current_state_ids) == 1 and create_event_id: + create_event_id = partial_current_state_ids.get(("m.room.create", "")) + if len(partial_current_state_ids) == 1 and create_event_id: # We can only get here if we're in the process of creating the room return True - for etype, state_key in current_state_ids: + for etype, state_key in partial_current_state_ids: if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): continue - event_id = current_state_ids[(etype, state_key)] + event_id = partial_current_state_ids[(etype, state_key)] event = await self.store.get_event(event_id, allow_none=True) if not event: continue @@ -1715,8 +1768,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): ] if len(remote_room_hosts) == 0: - raise SynapseError( - 404, + raise NoKnownServersError( "Can't join remote room because no servers " "that are in the room have been provided.", ) @@ -1947,7 +1999,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): ] if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") + raise NoKnownServersError() return await self.federation_handler.do_knock( remote_room_hosts, room_id, user.to_string(), content=content diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py index 221552a2a6..ba261702d4 100644 --- a/synapse/handlers/room_member_worker.py +++ b/synapse/handlers/room_member_worker.py @@ -15,8 +15,7 @@ import logging from typing import TYPE_CHECKING, List, Optional, Tuple -from synapse.api.errors import SynapseError -from synapse.handlers.room_member import RoomMemberHandler +from synapse.handlers.room_member import NoKnownServersError, RoomMemberHandler from synapse.replication.http.membership import ( ReplicationRemoteJoinRestServlet as ReplRemoteJoin, ReplicationRemoteKnockRestServlet as ReplRemoteKnock, @@ -52,7 +51,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler): ) -> Tuple[str, int]: """Implements RoomMemberHandler._remote_join""" if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") + raise NoKnownServersError() ret = await self._remote_join_client( requester=requester, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cb66376fb4..ffe766fd56 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -16,7 +16,6 @@ import itertools import logging from collections import OrderedDict -from http import HTTPStatus from typing import ( TYPE_CHECKING, Any, @@ -36,7 +35,7 @@ from prometheus_client import Counter import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import PartialStateConflictError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -72,24 +71,6 @@ event_counter = Counter( ) -class PartialStateConflictError(SynapseError): - """An internal error raised when attempting to persist an event with partial state - after the room containing the event has been un-partial stated. - - This error should be handled by recomputing the event context and trying again. - - This error has an HTTP status code so that it can be transported over replication. - It should not be exposed to clients. - """ - - def __init__(self) -> None: - super().__init__( - HTTPStatus.CONFLICT, - msg="Cannot persist partial state event in un-partial stated room", - errcode=Codes.UNKNOWN, - ) - - @attr.s(slots=True, auto_attribs=True) class DeltaState: """Deltas to use to update the `current_state_events` table. diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 57675fa407..5868eb2da7 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -575,26 +575,6 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase): fed_client = fed_handler.federation_client room_id = "!room:example.com" - membership_event = make_event_from_dict( - { - "room_id": room_id, - "type": "m.room.member", - "sender": "@alice:test", - "state_key": "@alice:test", - "content": {"membership": "join"}, - }, - RoomVersions.V10, - ) - - mock_make_membership_event = Mock( - return_value=make_awaitable( - ( - "example.com", - membership_event, - RoomVersions.V10, - ) - ) - ) EVENT_CREATE = make_event_from_dict( { @@ -640,6 +620,26 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase): }, room_version=RoomVersions.V10, ) + membership_event = make_event_from_dict( + { + "room_id": room_id, + "type": "m.room.member", + "sender": "@alice:test", + "state_key": "@alice:test", + "content": {"membership": "join"}, + "prev_events": [EVENT_INVITATION_MEMBERSHIP.event_id], + }, + RoomVersions.V10, + ) + mock_make_membership_event = Mock( + return_value=make_awaitable( + ( + "example.com", + membership_event, + RoomVersions.V10, + ) + ) + ) mock_send_join = Mock( return_value=make_awaitable( SendJoinResult( -- cgit 1.5.1 From c10e13125057e506381d1be8c2ec1394eee45d62 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 13 Feb 2023 11:49:20 +0000 Subject: Apply logging from hotfixes branch to develop (#15054) * Apply logging from hotfixes branch to develop Part of #4826. Originally added in #11882. * Changelog --- changelog.d/15054.misc | 1 + synapse/rest/client/account.py | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 changelog.d/15054.misc (limited to 'synapse') diff --git a/changelog.d/15054.misc b/changelog.d/15054.misc new file mode 100644 index 0000000000..d800b107cf --- /dev/null +++ b/changelog.d/15054.misc @@ -0,0 +1 @@ +Merge debug logging from the hotfixes branch. diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index 4373c73662..232f3a976d 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -415,6 +415,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): request, MsisdnRequestTokenBody ) msisdn = phone_number_to_msisdn(body.country, body.phone_number) + logger.info("Request #%s to verify ownership of %s", body.send_attempt, msisdn) if not await check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( @@ -444,6 +445,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): await self.hs.get_clock().sleep(random.randint(1, 10) / 10) return 200, {"sid": random_string(16)} + logger.info("MSISDN %s is already in use by %s", msisdn, existing_user_id) raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE) if not self.hs.config.registration.account_threepid_delegate_msisdn: @@ -468,6 +470,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): threepid_send_requests.labels(type="msisdn", reason="add_threepid").observe( body.send_attempt ) + logger.info("MSISDN %s: got response from identity server: %s", msisdn, ret) return 200, ret -- cgit 1.5.1 From bdccfd24773d7482ae497263634312640dab01d1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 13 Feb 2023 12:12:48 +0000 Subject: Refactor arguments of `try_unbind_threepid(_with_id_server)` from dict to separate args (#15053) --- changelog.d/15053.misc | 1 + synapse/handlers/auth.py | 5 ++-- synapse/handlers/deactivate_account.py | 7 +---- synapse/handlers/identity.py | 47 +++++++++++++++++----------------- synapse/rest/client/account.py | 7 +---- 5 files changed, 28 insertions(+), 39 deletions(-) create mode 100644 changelog.d/15053.misc (limited to 'synapse') diff --git a/changelog.d/15053.misc b/changelog.d/15053.misc new file mode 100644 index 0000000000..c27528f5c6 --- /dev/null +++ b/changelog.d/15053.misc @@ -0,0 +1 @@ +Refactor arguments of `try_unbind_threepid` and `_try_unbind_threepid_with_id_server` to not use dictionaries. \ No newline at end of file diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 30f2d46c3c..57a6854b1e 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1593,9 +1593,8 @@ class AuthHandler: if medium == "email": address = canonicalise_email(address) - identity_handler = self.hs.get_identity_handler() - result = await identity_handler.try_unbind_threepid( - user_id, {"medium": medium, "address": address, "id_server": id_server} + result = await self.hs.get_identity_handler().try_unbind_threepid( + user_id, medium, address, id_server ) await self.store.user_delete_threepid(user_id, medium, address) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index d74d135c0c..d24f649382 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -106,12 +106,7 @@ class DeactivateAccountHandler: for threepid in threepids: try: result = await self._identity_handler.try_unbind_threepid( - user_id, - { - "medium": threepid["medium"], - "address": threepid["address"], - "id_server": id_server, - }, + user_id, threepid["medium"], threepid["address"], id_server ) identity_server_supports_unbinding &= result except Exception: diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 848e46eb9b..bf0f7acf80 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -219,28 +219,31 @@ class IdentityHandler: data = json_decoder.decode(e.msg) # XXX WAT? return data - async def try_unbind_threepid(self, mxid: str, threepid: dict) -> bool: - """Attempt to remove a 3PID from an identity server, or if one is not provided, all - identity servers we're aware the binding is present on + async def try_unbind_threepid( + self, mxid: str, medium: str, address: str, id_server: Optional[str] + ) -> bool: + """Attempt to remove a 3PID from one or more identity servers. Args: mxid: Matrix user ID of binding to be removed - threepid: Dict with medium & address of binding to be - removed, and an optional id_server. + medium: The medium of the third-party ID. + address: The address of the third-party ID. + id_server: An identity server to attempt to unbind from. If None, + attempt to remove the association from all identity servers + known to potentially have it. Raises: - SynapseError: If we failed to contact the identity server + SynapseError: If we failed to contact one or more identity servers. Returns: - True on success, otherwise False if the identity - server doesn't support unbinding (or no identity server found to - contact). + True on success, otherwise False if the identity server doesn't + support unbinding (or no identity server to contact was found). """ - if threepid.get("id_server"): - id_servers = [threepid["id_server"]] + if id_server: + id_servers = [id_server] else: id_servers = await self.store.get_id_servers_user_bound( - user_id=mxid, medium=threepid["medium"], address=threepid["address"] + mxid, medium, address ) # We don't know where to unbind, so we don't have a choice but to return @@ -249,20 +252,21 @@ class IdentityHandler: changed = True for id_server in id_servers: - changed &= await self.try_unbind_threepid_with_id_server( - mxid, threepid, id_server + changed &= await self._try_unbind_threepid_with_id_server( + mxid, medium, address, id_server ) return changed - async def try_unbind_threepid_with_id_server( - self, mxid: str, threepid: dict, id_server: str + async def _try_unbind_threepid_with_id_server( + self, mxid: str, medium: str, address: str, id_server: str ) -> bool: """Removes a binding from an identity server Args: mxid: Matrix user ID of binding to be removed - threepid: Dict with medium & address of binding to be removed + medium: The medium of the third-party ID + address: The address of the third-party ID id_server: Identity server to unbind from Raises: @@ -286,7 +290,7 @@ class IdentityHandler: content = { "mxid": mxid, - "threepid": {"medium": threepid["medium"], "address": threepid["address"]}, + "threepid": {"medium": medium, "address": address}, } # we abuse the federation http client to sign the request, but we have to send it @@ -319,12 +323,7 @@ class IdentityHandler: except RequestTimedOutError: raise SynapseError(500, "Timed out contacting identity server") - await self.store.remove_user_bound_threepid( - user_id=mxid, - medium=threepid["medium"], - address=threepid["address"], - id_server=id_server, - ) + await self.store.remove_user_bound_threepid(mxid, medium, address, id_server) return changed diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index 232f3a976d..662f5bf762 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -737,12 +737,7 @@ class ThreepidUnbindRestServlet(RestServlet): # Attempt to unbind the threepid from an identity server. If id_server is None, try to # unbind from all identity servers this threepid has been added to in the past result = await self.identity_handler.try_unbind_threepid( - requester.user.to_string(), - { - "address": body.address, - "medium": body.medium, - "id_server": body.id_server, - }, + requester.user.to_string(), body.medium, body.address, body.id_server ) return 200, {"id_server_unbind_result": "success" if result else "no-support"} -- cgit 1.5.1 From 3d7aead5d62e6da97e006199b3f957325e54b053 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 13 Feb 2023 16:30:58 +0000 Subject: Tweak comment on `_is_local_room_accessible` as part of room visibility in `/hierarchy` to clarify the condition for a room being visible. (#14834) --- changelog.d/14834.misc | 1 + synapse/handlers/room_summary.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/14834.misc (limited to 'synapse') diff --git a/changelog.d/14834.misc b/changelog.d/14834.misc new file mode 100644 index 0000000000..e683212dc4 --- /dev/null +++ b/changelog.d/14834.misc @@ -0,0 +1 @@ +Tweak comment on `_is_local_room_accessible` as part of room visibility in `/hierarchy` to clarify the condition for a room being visible. \ No newline at end of file diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index 4472019fbc..807245160d 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -521,8 +521,8 @@ class RoomSummaryHandler: It should return true if: - * The requester is joined or can join the room (per MSC3173). - * The origin server has any user that is joined or can join the room. + * The requesting user is joined or can join the room (per MSC3173); or + * The origin server has any user that is joined or can join the room; or * The history visibility is set to world readable. Args: -- cgit 1.5.1 From db2b105d69fa331bb3f050df82266314f61577ea Mon Sep 17 00:00:00 2001 From: Harishankar Kumar <31770598+hari01584@users.noreply.github.com> Date: Tue, 14 Feb 2023 15:07:08 +0530 Subject: Change collection[str] to StrCollection in event_auth code (#14929) Signed-off-by: Harishankar Kumar --- changelog.d/14929.misc | 1 + synapse/event_auth.py | 23 +++++++++------------- synapse/events/__init__.py | 6 +++--- synapse/storage/databases/main/events.py | 7 +++---- .../storage/databases/main/events_bg_updates.py | 6 +++--- 5 files changed, 19 insertions(+), 24 deletions(-) create mode 100644 changelog.d/14929.misc (limited to 'synapse') diff --git a/changelog.d/14929.misc b/changelog.d/14929.misc new file mode 100644 index 0000000000..2cc3614dfd --- /dev/null +++ b/changelog.d/14929.misc @@ -0,0 +1 @@ +Use `StrCollection` to avoid potential bugs with `Collection[str]`. diff --git a/synapse/event_auth.py b/synapse/event_auth.py index e0be9f88cc..4d6d1b8ebd 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -16,18 +16,7 @@ import collections.abc import logging import typing -from typing import ( - Any, - Collection, - Dict, - Iterable, - List, - Mapping, - Optional, - Set, - Tuple, - Union, -) +from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes @@ -56,7 +45,13 @@ from synapse.api.room_versions import ( RoomVersions, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.types import MutableStateMap, StateMap, UserID, get_domain_from_id +from synapse.types import ( + MutableStateMap, + StateMap, + StrCollection, + UserID, + get_domain_from_id, +) if typing.TYPE_CHECKING: # conditional imports to avoid import cycle @@ -69,7 +64,7 @@ logger = logging.getLogger(__name__) class _EventSourceStore(Protocol): async def get_events( self, - event_ids: Collection[str], + event_ids: StrCollection, redact_behaviour: EventRedactBehaviour, get_prev_content: bool = False, allow_rejected: bool = False, diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 8aca9a3ab9..91118a8d84 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -39,7 +39,7 @@ from unpaddedbase64 import encode_base64 from synapse.api.constants import RelationTypes from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions -from synapse.types import JsonDict, RoomStreamToken +from synapse.types import JsonDict, RoomStreamToken, StrCollection from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze from synapse.util.stringutils import strtobool @@ -413,7 +413,7 @@ class EventBase(metaclass=abc.ABCMeta): """ return [e for e, _ in self._dict["prev_events"]] - def auth_event_ids(self) -> Sequence[str]: + def auth_event_ids(self) -> StrCollection: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. @@ -558,7 +558,7 @@ class FrozenEventV2(EventBase): """ return self._dict["prev_events"] - def auth_event_ids(self) -> Sequence[str]: + def auth_event_ids(self) -> StrCollection: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ffe766fd56..7996cbb557 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -25,7 +25,6 @@ from typing import ( Iterable, List, Optional, - Sequence, Set, Tuple, ) @@ -51,7 +50,7 @@ from synapse.storage.databases.main.search import SearchEntry from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator -from synapse.types import JsonDict, StateMap, get_domain_from_id +from synapse.types import JsonDict, StateMap, StrCollection, get_domain_from_id from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.stringutils import non_null_str_or_none @@ -552,7 +551,7 @@ class PersistEventsStore: event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], event_to_types: Dict[str, Tuple[str, str]], - event_to_auth_chain: Dict[str, Sequence[str]], + event_to_auth_chain: Dict[str, StrCollection], ) -> None: """Calculate the chain cover index for the given events. @@ -846,7 +845,7 @@ class PersistEventsStore: event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], event_to_types: Dict[str, Tuple[str, str]], - event_to_auth_chain: Dict[str, Sequence[str]], + event_to_auth_chain: Dict[str, StrCollection], events_to_calc_chain_id_for: Set[str], chain_map: Dict[str, Tuple[int, int]], ) -> Dict[str, Tuple[int, int]]: diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index b9d3c36d60..584536111d 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Tuple, cast +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast import attr @@ -29,7 +29,7 @@ from synapse.storage.database import ( ) from synapse.storage.databases.main.events import PersistEventsStore from synapse.storage.types import Cursor -from synapse.types import JsonDict +from synapse.types import JsonDict, StrCollection if TYPE_CHECKING: from synapse.server import HomeServer @@ -1061,7 +1061,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): self.event_chain_id_gen, # type: ignore[attr-defined] event_to_room_id, event_to_types, - cast(Dict[str, Sequence[str]], event_to_auth_chain), + cast(Dict[str, StrCollection], event_to_auth_chain), ) return _CalculateChainCover( -- cgit 1.5.1 From f09db5c9918b6aaeb1f53ab4fac3a7f05f512c5f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Feb 2023 12:10:29 +0100 Subject: Skip calculating unread push actions in `/sync` when `enable_push` is false. (#14980) --- changelog.d/14980.misc | 1 + synapse/handlers/sync.py | 8 ++++++++ synapse/storage/databases/main/event_push_actions.py | 7 +++++++ 3 files changed, 16 insertions(+) create mode 100644 changelog.d/14980.misc (limited to 'synapse') diff --git a/changelog.d/14980.misc b/changelog.d/14980.misc new file mode 100644 index 0000000000..145f4a788b --- /dev/null +++ b/changelog.d/14980.misc @@ -0,0 +1 @@ +Skip calculating unread push actions in /sync when enable_push is false. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4bae46158a..3a9cddf15a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -269,6 +269,8 @@ class SyncHandler: self._state_storage_controller = self._storage_controllers.state self._device_handler = hs.get_device_handler() + self.should_calculate_push_rules = hs.config.push.enable_push + # TODO: flush cache entries on subsequent sync request. # Once we get the next /sync request (ie, one with the same access token # that sets 'since' to 'next_batch'), we know that device won't need a @@ -1288,6 +1290,12 @@ class SyncHandler: async def unread_notifs_for_room_id( self, room_id: str, sync_config: SyncConfig ) -> RoomNotifCounts: + if not self.should_calculate_push_rules: + # If push rules have been universally disabled then we know we won't + # have any unread counts in the DB, so we may as well skip asking + # the DB. + return RoomNotifCounts.empty() + with Measure(self.clock, "unread_notifs_for_room_id"): return await self.store.get_unread_event_push_actions_by_room_for_user( diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 3a0c370fde..eeccf5db24 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -203,11 +203,18 @@ class RoomNotifCounts: # Map of thread ID to the notification counts. threads: Dict[str, NotifCounts] + @staticmethod + def empty() -> "RoomNotifCounts": + return _EMPTY_ROOM_NOTIF_COUNTS + def __len__(self) -> int: # To properly account for the amount of space in any caches. return len(self.threads) + 1 +_EMPTY_ROOM_NOTIF_COUNTS = RoomNotifCounts(NotifCounts(), {}) + + def _serialize_action( actions: Collection[Union[Mapping, str]], is_highlight: bool ) -> str: -- cgit 1.5.1 From cb262713b701d1abcbca03334d17e2d0f81eee4a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Feb 2023 12:20:25 +0100 Subject: Fix clashing DB txn name (#15070) * Fix clashing DB txn name * Newsfile --- changelog.d/15070.misc | 1 + synapse/storage/databases/main/end_to_end_keys.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15070.misc (limited to 'synapse') diff --git a/changelog.d/15070.misc b/changelog.d/15070.misc new file mode 100644 index 0000000000..0f3244de9f --- /dev/null +++ b/changelog.d/15070.misc @@ -0,0 +1 @@ +Fix clashing database transaction name. diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 752dc16e17..2c2d145666 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -262,7 +262,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker for batch in batch_iter(signature_query, 50): cross_sigs_result = await self.db_pool.runInteraction( - "get_e2e_cross_signing_signatures", + "get_e2e_cross_signing_signatures_for_devices", self._get_e2e_cross_signing_signatures_for_devices_txn, batch, ) -- cgit 1.5.1 From 463c19ac3648b242c480e299349d2ef90bf38a0b Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 14 Feb 2023 12:32:19 +0000 Subject: Faster joins: Omit device list updates from partial state rooms in /sync (#15069) ...when lazy loading of members is not enabled. It's weird to notify a client that another user's device list has changed when the client doesn't think that they share a room. Note that when a room is un-partial stated, device list updates are emitted for every member in that room over /sync. Signed-off-by: Sean Quah --- changelog.d/15069.misc | 1 + synapse/handlers/sync.py | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 changelog.d/15069.misc (limited to 'synapse') diff --git a/changelog.d/15069.misc b/changelog.d/15069.misc new file mode 100644 index 0000000000..e7a619ad2b --- /dev/null +++ b/changelog.d/15069.misc @@ -0,0 +1 @@ +Faster joins: omit device list updates originating from partial state rooms in /sync responses without lazy loading of members enabled. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 3a9cddf15a..4e4595312c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1399,6 +1399,11 @@ class SyncHandler: for room_id, is_partial_state in results.items() if is_partial_state ) + membership_change_events = [ + event + for event in membership_change_events + if not results.get(event.room_id, False) + ] # Incremental eager syncs should additionally include rooms that # - we are joined to -- cgit 1.5.1 From e9b1ff9f31f8ff093e7eaf9c54fa8f40a3b66aa8 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 14 Feb 2023 15:50:59 +0000 Subject: Prevent clients from reporting nonexistent events. (#13779) --- changelog.d/13779.bugfix | 1 + synapse/rest/client/report_event.py | 11 ++++++++++- tests/rest/client/test_report_event.py | 12 ++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13779.bugfix (limited to 'synapse') diff --git a/changelog.d/13779.bugfix b/changelog.d/13779.bugfix new file mode 100644 index 0000000000..a92c722c6e --- /dev/null +++ b/changelog.d/13779.bugfix @@ -0,0 +1 @@ +Prevent clients from reporting nonexistent events. \ No newline at end of file diff --git a/synapse/rest/client/report_event.py b/synapse/rest/client/report_event.py index e2b410cf32..9be5860221 100644 --- a/synapse/rest/client/report_event.py +++ b/synapse/rest/client/report_event.py @@ -16,7 +16,7 @@ import logging from http import HTTPStatus from typing import TYPE_CHECKING, Tuple -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest @@ -39,6 +39,7 @@ class ReportEventRestServlet(RestServlet): self.auth = hs.get_auth() self.clock = hs.get_clock() self.store = hs.get_datastores().main + self._event_handler = self.hs.get_event_handler() async def on_POST( self, request: SynapseRequest, room_id: str, event_id: str @@ -61,6 +62,14 @@ class ReportEventRestServlet(RestServlet): Codes.BAD_JSON, ) + event = await self._event_handler.get_event( + requester.user, room_id, event_id, show_redacted=False + ) + if event is None: + raise NotFoundError( + "Unable to report event: it does not exist or you aren't able to see it." + ) + await self.store.add_event_report( room_id=room_id, event_id=event_id, diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_report_event.py index 7cb1017a4a..1250685d39 100644 --- a/tests/rest/client/test_report_event.py +++ b/tests/rest/client/test_report_event.py @@ -73,6 +73,18 @@ class ReportEventTestCase(unittest.HomeserverTestCase): data = {"reason": None, "score": None} self._assert_status(400, data) + def test_cannot_report_nonexistent_event(self) -> None: + """ + Tests that we don't accept event reports for events which do not exist. + """ + channel = self.make_request( + "POST", + f"rooms/{self.room_id}/report/$nonsenseeventid:test", + {"reason": "i am very sad"}, + access_token=self.other_user_tok, + ) + self.assertEqual(404, channel.code, msg=channel.result["body"]) + def _assert_status(self, response_status: int, data: JsonDict) -> None: channel = self.make_request( "POST", self.report_path, data, access_token=self.other_user_tok -- cgit 1.5.1 From 119e0795a58548fb38fab299e7c362fcbb388d68 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 14 Feb 2023 14:02:19 -0500 Subject: Implement MSC3966: Add a push rule condition to search for a value in an array. (#15045) The `exact_event_property_contains` condition can be used to search for a value inside of an array. --- changelog.d/15045.feature | 1 + rust/benches/evaluator.rs | 32 +++++++++------- rust/src/push/evaluator.rs | 65 +++++++++++++++++++++++++------- rust/src/push/mod.rs | 33 +++++++++++++++- stubs/synapse/synapse_rust/push.pyi | 7 ++-- synapse/config/experimental.py | 5 +++ synapse/push/bulk_push_rule_evaluator.py | 21 +++++++---- synapse/types/__init__.py | 1 + tests/push/test_push_rule_evaluator.py | 53 ++++++++++++++++++++++++-- 9 files changed, 176 insertions(+), 42 deletions(-) create mode 100644 changelog.d/15045.feature (limited to 'synapse') diff --git a/changelog.d/15045.feature b/changelog.d/15045.feature new file mode 100644 index 0000000000..87766befda --- /dev/null +++ b/changelog.d/15045.feature @@ -0,0 +1 @@ +Experimental support for [MSC3966](https://github.com/matrix-org/matrix-spec-proposals/pull/3966): the `exact_event_property_contains` push rule condition. diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 229553ebf8..8213dfd9ea 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -15,8 +15,8 @@ #![feature(test)] use std::collections::BTreeSet; use synapse::push::{ - evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules, - SimpleJsonValue, + evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, JsonValue, + PushRules, SimpleJsonValue, }; use test::Bencher; @@ -27,15 +27,15 @@ fn bench_match_exact(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -54,6 +54,7 @@ fn bench_match_exact(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); @@ -76,15 +77,15 @@ fn bench_match_word(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -103,6 +104,7 @@ fn bench_match_word(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); @@ -125,15 +127,15 @@ fn bench_match_word_miss(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -152,6 +154,7 @@ fn bench_match_word_miss(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); @@ -174,15 +177,15 @@ fn bench_eval_message(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -201,6 +204,7 @@ fn bench_eval_message(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index dd6b4343ec..2eaa06ad76 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -14,6 +14,7 @@ use std::collections::{BTreeMap, BTreeSet}; +use crate::push::JsonValue; use anyhow::{Context, Error}; use lazy_static::lazy_static; use log::warn; @@ -63,7 +64,7 @@ impl RoomVersionFeatures { pub struct PushRuleEvaluator { /// A mapping of "flattened" keys to simple JSON values in the event, e.g. /// includes things like "type" and "content.msgtype". - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, /// The "content.body", if any. body: String, @@ -87,7 +88,7 @@ pub struct PushRuleEvaluator { /// The related events, indexed by relation type. Flattened in the same manner as /// `flattened_keys`. - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, /// If msc3664, push rules for related events, is enabled. related_event_match_enabled: bool, @@ -101,6 +102,9 @@ pub struct PushRuleEvaluator { /// If MSC3758 (exact_event_match push rule condition) is enabled. msc3758_exact_event_match: bool, + + /// If MSC3966 (exact_event_property_contains push rule condition) is enabled. + msc3966_exact_event_property_contains: bool, } #[pymethods] @@ -109,21 +113,22 @@ impl PushRuleEvaluator { #[allow(clippy::too_many_arguments)] #[new] pub fn py_new( - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, has_mentions: bool, user_mentions: BTreeSet, room_mention: bool, room_member_count: u64, sender_power_level: Option, notification_power_levels: BTreeMap, - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, related_event_match_enabled: bool, room_version_feature_flags: Vec, msc3931_enabled: bool, msc3758_exact_event_match: bool, + msc3966_exact_event_property_contains: bool, ) -> Result { let body = match flattened_keys.get("content.body") { - Some(SimpleJsonValue::Str(s)) => s.clone(), + Some(JsonValue::Value(SimpleJsonValue::Str(s))) => s.clone(), _ => String::new(), }; @@ -141,6 +146,7 @@ impl PushRuleEvaluator { room_version_feature_flags, msc3931_enabled, msc3758_exact_event_match, + msc3966_exact_event_property_contains, }) } @@ -263,6 +269,9 @@ impl PushRuleEvaluator { KnownCondition::RelatedEventMatch(event_match) => { self.match_related_event_match(event_match, user_id)? } + KnownCondition::ExactEventPropertyContains(exact_event_match) => { + self.match_exact_event_property_contains(exact_event_match)? + } KnownCondition::IsUserMention => { if let Some(uid) = user_id { self.user_mentions.contains(uid) @@ -345,7 +354,7 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(SimpleJsonValue::Str(haystack)) = + let haystack = if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) = self.flattened_keys.get(&*event_match.key) { haystack @@ -377,7 +386,9 @@ impl PushRuleEvaluator { let value = &exact_event_match.value; - let haystack = if let Some(haystack) = self.flattened_keys.get(&*exact_event_match.key) { + let haystack = if let Some(JsonValue::Value(haystack)) = + self.flattened_keys.get(&*exact_event_match.key) + { haystack } else { return Ok(false); @@ -441,11 +452,12 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(SimpleJsonValue::Str(haystack)) = event.get(&**key) { - haystack - } else { - return Ok(false); - }; + let haystack = + if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) = event.get(&**key) { + haystack + } else { + return Ok(false); + }; // For the content.body we match against "words", but for everything // else we match against the entire value. @@ -459,6 +471,29 @@ impl PushRuleEvaluator { compiled_pattern.is_match(haystack) } + /// Evaluates a `exact_event_property_contains` condition. (MSC3758) + fn match_exact_event_property_contains( + &self, + exact_event_match: &ExactEventMatchCondition, + ) -> Result { + // First check if the feature is enabled. + if !self.msc3966_exact_event_property_contains { + return Ok(false); + } + + let value = &exact_event_match.value; + + let haystack = if let Some(JsonValue::Array(haystack)) = + self.flattened_keys.get(&*exact_event_match.key) + { + haystack + } else { + return Ok(false); + }; + + Ok(haystack.contains(&**value)) + } + /// Match the member count against an 'is' condition /// The `is` condition can be things like '>2', '==3' or even just '4'. fn match_member_count(&self, is: &str) -> Result { @@ -488,7 +523,7 @@ fn push_rule_evaluator() { let mut flattened_keys = BTreeMap::new(); flattened_keys.insert( "content.body".to_string(), - SimpleJsonValue::Str("foo bar bob hello".to_string()), + JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())), ); let evaluator = PushRuleEvaluator::py_new( flattened_keys, @@ -503,6 +538,7 @@ fn push_rule_evaluator() { vec![], true, true, + true, ) .unwrap(); @@ -519,7 +555,7 @@ fn test_requires_room_version_supports_condition() { let mut flattened_keys = BTreeMap::new(); flattened_keys.insert( "content.body".to_string(), - SimpleJsonValue::Str("foo bar bob hello".to_string()), + JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())), ); let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()]; let evaluator = PushRuleEvaluator::py_new( @@ -535,6 +571,7 @@ fn test_requires_room_version_supports_condition() { flags, true, true, + true, ) .unwrap(); diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 79e519fe11..253b5f367c 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -58,7 +58,7 @@ use anyhow::{Context, Error}; use log::warn; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; -use pyo3::types::{PyBool, PyLong, PyString}; +use pyo3::types::{PyBool, PyList, PyLong, PyString}; use pythonize::{depythonize, pythonize}; use serde::de::Error as _; use serde::{Deserialize, Serialize}; @@ -280,6 +280,35 @@ impl<'source> FromPyObject<'source> for SimpleJsonValue { } } +/// A JSON values (list, string, int, boolean, or null). +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(untagged)] +pub enum JsonValue { + Array(Vec), + Value(SimpleJsonValue), +} + +impl<'source> FromPyObject<'source> for JsonValue { + fn extract(ob: &'source PyAny) -> PyResult { + if let Ok(l) = ::try_from(ob) { + match l.iter().map(SimpleJsonValue::extract).collect() { + Ok(a) => Ok(JsonValue::Array(a)), + Err(e) => Err(PyTypeError::new_err(format!( + "Can't convert to JsonValue::Array: {}", + e + ))), + } + } else if let Ok(v) = SimpleJsonValue::extract(ob) { + Ok(JsonValue::Value(v)) + } else { + Err(PyTypeError::new_err(format!( + "Can't convert from {} to JsonValue", + ob.get_type().name()? + ))) + } + } +} + /// A condition used in push rules to match against an event. /// /// We need this split as `serde` doesn't give us the ability to have a @@ -303,6 +332,8 @@ pub enum KnownCondition { ExactEventMatch(ExactEventMatchCondition), #[serde(rename = "im.nheko.msc3664.related_event_match")] RelatedEventMatch(RelatedEventMatchCondition), + #[serde(rename = "org.matrix.msc3966.exact_event_property_contains")] + ExactEventPropertyContains(ExactEventMatchCondition), #[serde(rename = "org.matrix.msc3952.is_user_mention")] IsUserMention, #[serde(rename = "org.matrix.msc3952.is_room_mention")] diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 328f681a29..7b33c30cc9 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -14,7 +14,7 @@ from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union -from synapse.types import JsonDict, SimpleJsonValue +from synapse.types import JsonDict, JsonValue class PushRule: @property @@ -56,18 +56,19 @@ def get_base_rule_ids() -> Collection[str]: ... class PushRuleEvaluator: def __init__( self, - flattened_keys: Mapping[str, SimpleJsonValue], + flattened_keys: Mapping[str, JsonValue], has_mentions: bool, user_mentions: Set[str], room_mention: bool, room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], - related_events_flattened: Mapping[str, Mapping[str, SimpleJsonValue]], + related_events_flattened: Mapping[str, Mapping[str, JsonValue]], related_event_match_enabled: bool, room_version_feature_flags: Tuple[str, ...], msc3931_enabled: bool, msc3758_exact_event_match: bool, + msc3966_exact_event_property_contains: bool, ): ... def run( self, diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 6ac2f0c10d..1d294f8798 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -188,3 +188,8 @@ class ExperimentalConfig(Config): self.msc3958_supress_edit_notifs = experimental.get( "msc3958_supress_edit_notifs", False ) + + # MSC3966: exact_event_property_contains push rule condition. + self.msc3966_exact_event_property_contains = experimental.get( + "msc3966_exact_event_property_contains", False + ) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index f6a5bffb0f..2e917c90c4 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -44,7 +44,7 @@ from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator -from synapse.types import SimpleJsonValue +from synapse.types import JsonValue from synapse.types.state import StateFilter from synapse.util.caches import register_cache from synapse.util.metrics import measure_func @@ -259,13 +259,13 @@ class BulkPushRuleEvaluator: async def _related_events( self, event: EventBase - ) -> Dict[str, Dict[str, SimpleJsonValue]]: + ) -> Dict[str, Dict[str, JsonValue]]: """Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation Returns: Mapping of relation type to flattened events. """ - related_events: Dict[str, Dict[str, SimpleJsonValue]] = {} + related_events: Dict[str, Dict[str, JsonValue]] = {} if self._related_event_match_enabled: related_event_id = event.content.get("m.relates_to", {}).get("event_id") relation_type = event.content.get("m.relates_to", {}).get("rel_type") @@ -429,6 +429,7 @@ class BulkPushRuleEvaluator: event.room_version.msc3931_push_features, self.hs.config.experimental.msc1767_enabled, # MSC3931 flag self.hs.config.experimental.msc3758_exact_event_match, + self.hs.config.experimental.msc3966_exact_event_property_contains, ) users = rules_by_user.keys() @@ -502,18 +503,22 @@ RulesByUser = Dict[str, List[Rule]] StateGroup = Union[object, int] +def _is_simple_value(value: Any) -> bool: + return isinstance(value, (bool, str)) or type(value) is int or value is None + + def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], prefix: Optional[List[str]] = None, - result: Optional[Dict[str, SimpleJsonValue]] = None, + result: Optional[Dict[str, JsonValue]] = None, *, msc3783_escape_event_match_key: bool = False, -) -> Dict[str, SimpleJsonValue]: +) -> Dict[str, JsonValue]: """ Given a JSON dictionary (or event) which might contain sub dictionaries, flatten it into a single layer dictionary by combining the keys & sub-keys. - String, integer, boolean, and null values are kept. All others are dropped. + String, integer, boolean, null or lists of those values are kept. All others are dropped. Transforms: @@ -542,8 +547,10 @@ def _flatten_dict( # nested fields. key = key.replace("\\", "\\\\").replace(".", "\\.") - if isinstance(value, (bool, str)) or type(value) is int or value is None: + if _is_simple_value(value): result[".".join(prefix + [key])] = value + elif isinstance(value, (list, tuple)): + result[".".join(prefix + [key])] = [v for v in value if _is_simple_value(v)] elif isinstance(value, Mapping): # do not set `room_version` due to recursion considerations below _flatten_dict( diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 52e366c8ae..33363867c4 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -71,6 +71,7 @@ MutableStateMap = MutableMapping[StateKey, T] # JSON types. These could be made stronger, but will do for now. # A "simple" (canonical) JSON value. SimpleJsonValue = Optional[Union[str, int, bool]] +JsonValue = Union[List[SimpleJsonValue], Tuple[SimpleJsonValue, ...], SimpleJsonValue] # A JSON-serialisable dict. JsonDict = Dict[str, Any] # A JSON-serialisable mapping; roughly speaking an immutable JSONDict. diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 6603447341..0554d247bc 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -32,6 +32,7 @@ from synapse.storage.databases.main.appservice import _make_exclusive_regex from synapse.synapse_rust.push import PushRuleEvaluator from synapse.types import JsonDict, JsonMapping, UserID from synapse.util import Clock +from synapse.util.frozenutils import freeze from tests import unittest from tests.test_utils.event_injection import create_event, inject_member_event @@ -57,17 +58,24 @@ class FlattenDictTestCase(unittest.TestCase): ) def test_non_string(self) -> None: - """Booleans, ints, and nulls should be kept while other items are dropped.""" + """String, booleans, ints, nulls and list of those should be kept while other items are dropped.""" input: Dict[str, Any] = { "woo": "woo", "foo": True, "bar": 1, "baz": None, - "fuzz": [], + "fuzz": ["woo", True, 1, None, [], {}], "boo": {}, } self.assertEqual( - {"woo": "woo", "foo": True, "bar": 1, "baz": None}, _flatten_dict(input) + { + "woo": "woo", + "foo": True, + "bar": 1, + "baz": None, + "fuzz": ["woo", True, 1, None], + }, + _flatten_dict(input), ) def test_event(self) -> None: @@ -117,6 +125,7 @@ class FlattenDictTestCase(unittest.TestCase): "room_id": "!test:test", "sender": "@alice:test", "type": "m.room.message", + "content.org.matrix.msc1767.markup": [], } self.assertEqual(expected, _flatten_dict(event)) @@ -128,6 +137,7 @@ class FlattenDictTestCase(unittest.TestCase): "room_id": "!test:test", "sender": "@alice:test", "type": "m.room.message", + "content.org.matrix.msc1767.markup": [], } self.assertEqual(expected, _flatten_dict(event)) @@ -169,6 +179,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): room_version_feature_flags=event.room_version.msc3931_push_features, msc3931_enabled=True, msc3758_exact_event_match=True, + msc3966_exact_event_property_contains=True, ) def test_display_name(self) -> None: @@ -549,6 +560,42 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "incorrect types should not match", ) + def test_exact_event_property_contains(self) -> None: + """Check that exact_event_property_contains conditions work as expected.""" + + condition = { + "kind": "org.matrix.msc3966.exact_event_property_contains", + "key": "content.value", + "value": "foobaz", + } + self._assert_matches( + condition, + {"value": ["foobaz"]}, + "exact value should match", + ) + self._assert_matches( + condition, + {"value": ["foobaz", "bugz"]}, + "extra values should match", + ) + self._assert_not_matches( + condition, + {"value": ["FoobaZ"]}, + "values should match and be case-sensitive", + ) + self._assert_not_matches( + condition, + {"value": "foobaz"}, + "does not search in a string", + ) + + # it should work on frozendicts too + self._assert_matches( + condition, + freeze({"value": ["foobaz"]}), + "values should match on frozendicts", + ) + def test_no_body(self) -> None: """Not having a body shouldn't break the evaluator.""" evaluator = self._get_evaluator({}) -- cgit 1.5.1 From 06ba71083eefbe1fd9a8eeed10e541dd7b52796f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Feb 2023 23:42:29 +0000 Subject: Fix order of partial state tables when purging (#15068) * Fix order of partial state tables when purging `partial_state_rooms` has an FK on `events` pointing to the join event we get from `/send_join`, so we must delete from that table before deleting from `events`. **NB:** It would be nice to cancel any resync processes for the room being purged. We do not do this at present. To do so reliably we'd need an internal HTTP "replication" endpoint, because the worker doing the resync process may be different to that handling the purge request. The first time the resync process tries to write data after the deletion it will fail because we have deleted necessary data e.g. auth events. AFAICS it will not retry the resync, so the only downside to not cancelling the resync is a scary-looking traceback. (This is presumably extremely race-sensitive.) * Changelog * admist(?) -> between * Warn about a race * Fix typo, thanks Sean Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --------- Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/15068.bugfix | 1 + synapse/handlers/federation.py | 5 +++++ synapse/storage/databases/main/purge_events.py | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 changelog.d/15068.bugfix (limited to 'synapse') diff --git a/changelog.d/15068.bugfix b/changelog.d/15068.bugfix new file mode 100644 index 0000000000..f09ffa2877 --- /dev/null +++ b/changelog.d/15068.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.76.0 where partially-joined rooms could not be deleted using the [purge room API](https://matrix-org.github.io/synapse/latest/admin_api/rooms.html#delete-room-api). diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 08727e4857..1d0f6bcd6f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1880,6 +1880,11 @@ class FederationHandler: logger.info("Updating current state for %s", room_id) # TODO(faster_joins): notify workers in notify_room_un_partial_stated # https://github.com/matrix-org/synapse/issues/12994 + # + # NB: there's a potential race here. If room is purged just before we + # call this, we _might_ end up inserting rows into current_state_events. + # (The logic is hard to chase through.) We think this is fine, but if + # not the HS admin should purge the room again. await self.state_handler.update_current_state(room_id) logger.info("Handling any pending device list updates") diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 9213ce0b5a..9c41d01e13 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -420,12 +420,14 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "event_push_actions", "event_search", "event_failed_pull_attempts", + # Note: the partial state tables have foreign keys between each other, and to + # `events` and `rooms`. We need to delete from them in the right order. "partial_state_events", + "partial_state_rooms_servers", + "partial_state_rooms", "events", "federation_inbound_events_staging", "local_current_membership", - "partial_state_rooms_servers", - "partial_state_rooms", "receipts_graph", "receipts_linearized", "room_aliases", -- cgit 1.5.1 From 5febf88b6c5194582f427142dc0850625547c0d9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 15 Feb 2023 11:47:57 +0000 Subject: Update the error code for duplicate annotation (#15075) --- changelog.d/15075.feature | 2 ++ synapse/api/errors.py | 4 ++++ synapse/handlers/message.py | 6 +++++- 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15075.feature (limited to 'synapse') diff --git a/changelog.d/15075.feature b/changelog.d/15075.feature new file mode 100644 index 0000000000..d25a7567a4 --- /dev/null +++ b/changelog.d/15075.feature @@ -0,0 +1,2 @@ +Update the error code returned when user sends a duplicate annotation. + diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 9235ce6536..e1737de59b 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -108,6 +108,10 @@ class Codes(str, Enum): USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL" + # Attempt to send a second annotation with the same event type & annotation key + # MSC2677 + DUPLICATE_ANNOTATION = "M_DUPLICATE_ANNOTATION" + class CodeMessageException(RuntimeError): """An exception with integer code and message string attributes. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8f5b658d9d..aa90d0000d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1337,7 +1337,11 @@ class EventCreationHandler: relation.parent_id, event.type, aggregation_key, event.sender ) if already_exists: - raise SynapseError(400, "Can't send same reaction twice") + raise SynapseError( + 400, + "Can't send same reaction twice", + errcode=Codes.DUPLICATE_ANNOTATION, + ) # Don't attempt to start a thread if the parent event is a relation. elif relation.rel_type == RelationTypes.THREAD: -- cgit 1.5.1 From 27a3a72a50cb24f25e48fad1e6e79aba2cd1bea2 Mon Sep 17 00:00:00 2001 From: 999lakhisidhu <42063995+999lakhisidhu@users.noreply.github.com> Date: Wed, 15 Feb 2023 16:39:31 +0400 Subject: Support for selecting the Redis logical database. (#15034) Note that this is only used for key-value store (cached values) and not for the pub/sub replication used by Synapse. --- changelog.d/15034.feature | 1 + contrib/docker_compose_workers/README.md | 1 + docs/usage/configuration/config_documentation.md | 4 ++++ synapse/config/redis.py | 1 + synapse/server.py | 1 + 5 files changed, 8 insertions(+) create mode 100644 changelog.d/15034.feature (limited to 'synapse') diff --git a/changelog.d/15034.feature b/changelog.d/15034.feature new file mode 100644 index 0000000000..34f320da92 --- /dev/null +++ b/changelog.d/15034.feature @@ -0,0 +1 @@ +Allow Synapse to use a specific Redis [logical database](https://redis.io/commands/select/) in worker-mode deployments. diff --git a/contrib/docker_compose_workers/README.md b/contrib/docker_compose_workers/README.md index bdd3dd32e0..d3cdfe5614 100644 --- a/contrib/docker_compose_workers/README.md +++ b/contrib/docker_compose_workers/README.md @@ -68,6 +68,7 @@ redis: enabled: true host: redis port: 6379 + # dbid: # password: ``` diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 2883f76a26..75483bfb12 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3927,6 +3927,9 @@ This setting has the following sub-options: * `host` and `port`: Optional host and port to use to connect to redis. Defaults to localhost and 6379 * `password`: Optional password if configured on the Redis instance. +* `dbid`: Optional redis dbid if needs to connect to specific redis logical db. + + _Added in Synapse 1.78.0._ Example configuration: ```yaml @@ -3935,6 +3938,7 @@ redis: host: localhost port: 6379 password: + dbid: ``` --- ## Individual worker configuration diff --git a/synapse/config/redis.py b/synapse/config/redis.py index b42dd2e93a..e6a75be434 100644 --- a/synapse/config/redis.py +++ b/synapse/config/redis.py @@ -33,4 +33,5 @@ class RedisConfig(Config): self.redis_host = redis_config.get("host", "localhost") self.redis_port = redis_config.get("port", 6379) + self.redis_dbid = redis_config.get("dbid", None) self.redis_password = redis_config.get("password") diff --git a/synapse/server.py b/synapse/server.py index efc6b5f895..e5a3475247 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -827,6 +827,7 @@ class HomeServer(metaclass=abc.ABCMeta): hs=self, host=self.config.redis.redis_host, port=self.config.redis.redis_port, + dbid=self.config.redis.redis_dbid, password=self.config.redis.redis_password, reconnect=True, ) -- cgit 1.5.1 From 3ad817bfe561e0b7ddcd8398a76a4a4d3d789138 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 15 Feb 2023 13:59:06 +0000 Subject: Fix federated joins when the first server in the list is not in the room (#15074) Previously we would give up upon receiving a 404 from the first server, instead of trying the rest of the servers in the list. Signed-off-by: Sean Quah --- changelog.d/15074.bugfix | 1 + synapse/federation/federation_client.py | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 changelog.d/15074.bugfix (limited to 'synapse') diff --git a/changelog.d/15074.bugfix b/changelog.d/15074.bugfix new file mode 100644 index 0000000000..d1ceb4f4c8 --- /dev/null +++ b/changelog.d/15074.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where federated joins would fail if the first server in the list of servers to try is not in the room. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 0ac85a3be7..7d04560dca 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -884,7 +884,7 @@ class FederationClient(FederationBase): if 500 <= e.code < 600: failover = True - elif e.code == 400 and synapse_error.errcode in failover_errcodes: + elif 400 <= e.code < 500 and synapse_error.errcode in failover_errcodes: failover = True elif failover_on_unknown_endpoint and self._is_unknown_endpoint( @@ -999,14 +999,13 @@ class FederationClient(FederationBase): return destination, ev, room_version + failover_errcodes = {Codes.NOT_FOUND} # MSC3083 defines additional error codes for room joins. Unfortunately # we do not yet know the room version, assume these will only be returned # by valid room versions. - failover_errcodes = ( - (Codes.UNABLE_AUTHORISE_JOIN, Codes.UNABLE_TO_GRANT_JOIN) - if membership == Membership.JOIN - else None - ) + if membership == Membership.JOIN: + failover_errcodes.add(Codes.UNABLE_AUTHORISE_JOIN) + failover_errcodes.add(Codes.UNABLE_TO_GRANT_JOIN) return await self._try_destination_list( "make_" + membership, -- cgit 1.5.1 From 979f237b282cbdaab8d74cc4c7473117093d63d9 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 16 Feb 2023 09:51:22 -0500 Subject: Update intentional mentions (MSC3952) to depend on `exact_event_match` (MSC3758). (#15037) This replaces the specific `is_room_mention` push rule condition used in MSC3952 with the generic `exact_event_match` push rule condition from MSC3758. No functionality changes due to this. --- changelog.d/15037.misc | 1 + rust/benches/evaluator.rs | 4 ---- rust/src/push/base_rules.rs | 7 +++++-- rust/src/push/evaluator.rs | 7 ------- rust/src/push/mod.rs | 13 ------------- stubs/synapse/synapse_rust/push.pyi | 1 - synapse/config/experimental.py | 7 ++++--- synapse/push/bulk_push_rule_evaluator.py | 4 ---- tests/push/test_bulk_push_rule_evaluator.py | 18 ++++++++++++++++-- tests/push/test_push_rule_evaluator.py | 23 ----------------------- 10 files changed, 26 insertions(+), 59 deletions(-) create mode 100644 changelog.d/15037.misc (limited to 'synapse') diff --git a/changelog.d/15037.misc b/changelog.d/15037.misc new file mode 100644 index 0000000000..fabfe77d35 --- /dev/null +++ b/changelog.d/15037.misc @@ -0,0 +1 @@ +Update [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952) support based on changes to the MSC. diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 8213dfd9ea..efd19a2165 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -45,7 +45,6 @@ fn bench_match_exact(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), @@ -95,7 +94,6 @@ fn bench_match_word(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), @@ -145,7 +143,6 @@ fn bench_match_word_miss(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), @@ -195,7 +192,6 @@ fn bench_eval_message(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs index dcbca340fe..4a62b9696f 100644 --- a/rust/src/push/base_rules.rs +++ b/rust/src/push/base_rules.rs @@ -21,13 +21,13 @@ use lazy_static::lazy_static; use serde_json::Value; use super::KnownCondition; -use crate::push::Action; use crate::push::Condition; use crate::push::EventMatchCondition; use crate::push::PushRule; use crate::push::RelatedEventMatchCondition; use crate::push::SetTweak; use crate::push::TweakValue; +use crate::push::{Action, ExactEventMatchCondition, SimpleJsonValue}; const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak { set_tweak: Cow::Borrowed("highlight"), @@ -168,7 +168,10 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ rule_id: Cow::Borrowed(".org.matrix.msc3952.is_room_mention"), priority_class: 5, conditions: Cow::Borrowed(&[ - Condition::Known(KnownCondition::IsRoomMention), + Condition::Known(KnownCondition::ExactEventMatch(ExactEventMatchCondition { + key: Cow::Borrowed("content.org.matrix.msc3952.mentions.room"), + value: Cow::Borrowed(&SimpleJsonValue::Bool(true)), + })), Condition::Known(KnownCondition::SenderNotificationPermission { key: Cow::Borrowed("room"), }), diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index 2eaa06ad76..55551ecb56 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -73,8 +73,6 @@ pub struct PushRuleEvaluator { has_mentions: bool, /// The user mentions that were part of the message. user_mentions: BTreeSet, - /// True if the message is a room message. - room_mention: bool, /// The number of users in the room. room_member_count: u64, @@ -116,7 +114,6 @@ impl PushRuleEvaluator { flattened_keys: BTreeMap, has_mentions: bool, user_mentions: BTreeSet, - room_mention: bool, room_member_count: u64, sender_power_level: Option, notification_power_levels: BTreeMap, @@ -137,7 +134,6 @@ impl PushRuleEvaluator { body, has_mentions, user_mentions, - room_mention, room_member_count, notification_power_levels, sender_power_level, @@ -279,7 +275,6 @@ impl PushRuleEvaluator { false } } - KnownCondition::IsRoomMention => self.room_mention, KnownCondition::ContainsDisplayName => { if let Some(dn) = display_name { if !dn.is_empty() { @@ -529,7 +524,6 @@ fn push_rule_evaluator() { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), BTreeMap::new(), @@ -562,7 +556,6 @@ fn test_requires_room_version_supports_condition() { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), BTreeMap::new(), diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 253b5f367c..fdd2b2c143 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -336,8 +336,6 @@ pub enum KnownCondition { ExactEventPropertyContains(ExactEventMatchCondition), #[serde(rename = "org.matrix.msc3952.is_user_mention")] IsUserMention, - #[serde(rename = "org.matrix.msc3952.is_room_mention")] - IsRoomMention, ContainsDisplayName, RoomMemberCount { #[serde(skip_serializing_if = "Option::is_none")] @@ -667,17 +665,6 @@ fn test_deserialize_unstable_msc3952_user_condition() { )); } -#[test] -fn test_deserialize_unstable_msc3952_room_condition() { - let json = r#"{"kind":"org.matrix.msc3952.is_room_mention"}"#; - - let condition: Condition = serde_json::from_str(json).unwrap(); - assert!(matches!( - condition, - Condition::Known(KnownCondition::IsRoomMention) - )); -} - #[test] fn test_deserialize_custom_condition() { let json = r#"{"kind":"custom_tag"}"#; diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 7b33c30cc9..a8f0ed2435 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -59,7 +59,6 @@ class PushRuleEvaluator: flattened_keys: Mapping[str, JsonValue], has_mentions: bool, user_mentions: Set[str], - room_mention: bool, room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 1d294f8798..54c91953e1 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -179,9 +179,10 @@ class ExperimentalConfig(Config): "msc3783_escape_event_match_key", False ) - # MSC3952: Intentional mentions - self.msc3952_intentional_mentions = experimental.get( - "msc3952_intentional_mentions", False + # MSC3952: Intentional mentions, this depends on MSC3758. + self.msc3952_intentional_mentions = ( + experimental.get("msc3952_intentional_mentions", False) + and self.msc3758_exact_event_match ) # MSC3959: Do not generate notifications for edits. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 2e917c90c4..5fc38431ba 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -400,7 +400,6 @@ class BulkPushRuleEvaluator: mentions = event.content.get(EventContentFields.MSC3952_MENTIONS) has_mentions = self._intentional_mentions_enabled and isinstance(mentions, dict) user_mentions: Set[str] = set() - room_mention = False if has_mentions: # mypy seems to have lost the type even though it must be a dict here. assert isinstance(mentions, dict) @@ -410,8 +409,6 @@ class BulkPushRuleEvaluator: user_mentions = set( filter(lambda item: isinstance(item, str), user_mentions_raw) ) - # Room mention is only true if the value is exactly true. - room_mention = mentions.get("room") is True evaluator = PushRuleEvaluator( _flatten_dict( @@ -420,7 +417,6 @@ class BulkPushRuleEvaluator: ), has_mentions, user_mentions, - room_mention, room_member_count, sender_power_level, notification_levels, diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 7567756135..199e3d7b70 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -227,7 +227,14 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) return len(result) > 0 - @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) + @override_config( + { + "experimental_features": { + "msc3758_exact_event_match": True, + "msc3952_intentional_mentions": True, + } + } + ) def test_user_mentions(self) -> None: """Test the behavior of an event which includes invalid user mentions.""" bulk_evaluator = BulkPushRuleEvaluator(self.hs) @@ -323,7 +330,14 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) ) - @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) + @override_config( + { + "experimental_features": { + "msc3758_exact_event_match": True, + "msc3952_intentional_mentions": True, + } + } + ) def test_room_mentions(self) -> None: """Test the behavior of an event which includes invalid room mentions.""" bulk_evaluator = BulkPushRuleEvaluator(self.hs) diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 0554d247bc..d320a12f96 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -149,7 +149,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): *, has_mentions: bool = False, user_mentions: Optional[Set[str]] = None, - room_mention: bool = False, related_events: Optional[JsonDict] = None, ) -> PushRuleEvaluator: event = FrozenEvent( @@ -170,7 +169,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): _flatten_dict(event), has_mentions, user_mentions or set(), - room_mention, room_member_count, sender_power_level, cast(Dict[str, int], power_levels.get("notifications", {})), @@ -232,27 +230,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions # since the BulkPushRuleEvaluator is what handles data sanitisation. - def test_room_mentions(self) -> None: - """Check for room mentions.""" - condition = {"kind": "org.matrix.msc3952.is_room_mention"} - - # No room mention shouldn't match. - evaluator = self._get_evaluator({}, has_mentions=True) - self.assertFalse(evaluator.matches(condition, None, None)) - - # Room mention should match. - evaluator = self._get_evaluator({}, has_mentions=True, room_mention=True) - self.assertTrue(evaluator.matches(condition, None, None)) - - # A room mention and user mention is valid. - evaluator = self._get_evaluator( - {}, has_mentions=True, user_mentions={"@another:test"}, room_mention=True - ) - self.assertTrue(evaluator.matches(condition, None, None)) - - # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions - # since the BulkPushRuleEvaluator is what handles data sanitisation. - def _assert_matches( self, condition: JsonDict, content: JsonMapping, msg: Optional[str] = None ) -> None: -- cgit 1.5.1 From ffc2ee521d26f5b842df7902ade5de7a538e602d Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 16 Feb 2023 16:09:11 +0000 Subject: Use mypy 1.0 (#15052) * Update mypy and mypy-zope * Remove unused ignores These used to suppress ``` synapse/storage/engines/__init__.py:28: error: "__new__" must return a class instance (got "NoReturn") [misc] ``` and ``` synapse/http/matrixfederationclient.py:1270: error: "BaseException" has no attribute "reasons" [attr-defined] ``` (note that we check `hasattr(e, "reasons")` above) * Avoid empty body warnings, sometimes by marking methods as abstract E.g. ``` tests/handlers/test_register.py:58: error: Missing return statement [empty-body] tests/handlers/test_register.py:108: error: Missing return statement [empty-body] ``` * Suppress false positive about `JaegerConfig` Complaint was ``` synapse/logging/opentracing.py:450: error: Function "Type[Config]" could always be true in boolean context [truthy-function] ``` * Fix not calling `is_state()` Oops! ``` tests/rest/client/test_third_party_rules.py:428: error: Function "Callable[[], bool]" could always be true in boolean context [truthy-function] ``` * Suppress false positives from ParamSpecs ```` synapse/logging/opentracing.py:971: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]" [arg-type] synapse/logging/opentracing.py:1017: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]" [arg-type] ```` * Drive-by improvement to `wrapping_logic` annotation * Workaround false "unreachable" positives See https://github.com/Shoobx/mypy-zope/issues/91 ``` tests/http/test_proxyagent.py:626: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:762: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:826: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:838: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:845: error: Statement is unreachable [unreachable] tests/http/federation/test_matrix_federation_agent.py:151: error: Statement is unreachable [unreachable] tests/http/federation/test_matrix_federation_agent.py:452: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:60: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:93: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:127: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:152: error: Statement is unreachable [unreachable] ``` * Changelog * Tweak DBAPI2 Protocol to be accepted by mypy 1.0 Some extra context in: - https://github.com/matrix-org/python-canonicaljson/pull/57 - https://github.com/python/mypy/issues/6002 - https://mypy.readthedocs.io/en/latest/common_issues.html#covariant-subtyping-of-mutable-protocol-members-is-rejected * Pull in updated canonicaljson lib so the protocol check just works * Improve comments in opentracing I tried to workaround the ignores but found it too much trouble. I think the corresponding issue is https://github.com/python/mypy/issues/12909. The mypy repo has a PR claiming to fix this (https://github.com/python/mypy/pull/14677) which might mean this gets resolved soon? * Better annotation for INTERACTIVE_AUTH_CHECKERS * Drive-by AUTH_TYPE annotation, to remove an ignore --- changelog.d/15052.misc | 1 + poetry.lock | 69 ++++++++++---------- synapse/handlers/auth.py | 2 +- synapse/handlers/ui_auth/checkers.py | 18 ++++-- synapse/http/matrixfederationclient.py | 2 +- synapse/logging/opentracing.py | 24 +++++-- synapse/rest/media/v1/_base.py | 9 ++- synapse/storage/engines/__init__.py | 4 +- synapse/storage/types.py | 74 ++++++++++++++++++---- synapse/streams/__init__.py | 7 +- tests/handlers/test_register.py | 4 +- .../federation/test_matrix_federation_agent.py | 11 ++-- tests/http/test_proxyagent.py | 40 ++++++------ tests/logging/test_remote_handler.py | 17 ++--- tests/rest/client/test_auth.py | 3 + tests/rest/client/test_third_party_rules.py | 2 +- tests/utils.py | 26 +++++++- 17 files changed, 209 insertions(+), 104 deletions(-) create mode 100644 changelog.d/15052.misc (limited to 'synapse') diff --git a/changelog.d/15052.misc b/changelog.d/15052.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/15052.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/poetry.lock b/poetry.lock index e534b30d2b..eb1e3d797b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -146,14 +146,14 @@ css = ["tinycss2 (>=1.1.0,<1.2)"] [[package]] name = "canonicaljson" -version = "1.6.4" +version = "1.6.5" description = "Canonical JSON" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "canonicaljson-1.6.4-py3-none-any.whl", hash = "sha256:55d282853b4245dbcd953fe54c39b91571813d7c44e1dbf66e3c4f97ff134a48"}, - {file = "canonicaljson-1.6.4.tar.gz", hash = "sha256:6c09b2119511f30eb1126cfcd973a10824e20f1cfd25039cde3d1218dd9c8d8f"}, + {file = "canonicaljson-1.6.5-py3-none-any.whl", hash = "sha256:806ea6f2cbb7405d20259e1c36dd1214ba5c242fa9165f5bd0bf2081f82c23fb"}, + {file = "canonicaljson-1.6.5.tar.gz", hash = "sha256:68dfc157b011e07d94bf74b5d4ccc01958584ed942d9dfd5fdd706609e81cd4b"}, ] [package.dependencies] @@ -1146,36 +1146,38 @@ files = [ [[package]] name = "mypy" -version = "0.981" +version = "1.0.0" description = "Optional static typing for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"}, - {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"}, - {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"}, - {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"}, - {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"}, - {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"}, - {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"}, - {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"}, - {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"}, - {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"}, - {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"}, - {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"}, - {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"}, - {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"}, - {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"}, - {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"}, - {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"}, - {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"}, - {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"}, - {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"}, - {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"}, - {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"}, - {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"}, - {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"}, + {file = "mypy-1.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0626db16705ab9f7fa6c249c017c887baf20738ce7f9129da162bb3075fc1af"}, + {file = "mypy-1.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ace23f6bb4aec4604b86c4843276e8fa548d667dbbd0cb83a3ae14b18b2db6c"}, + {file = "mypy-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87edfaf344c9401942883fad030909116aa77b0fa7e6e8e1c5407e14549afe9a"}, + {file = "mypy-1.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0ab090d9240d6b4e99e1fa998c2d0aa5b29fc0fb06bd30e7ad6183c95fa07593"}, + {file = "mypy-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:7cc2c01dfc5a3cbddfa6c13f530ef3b95292f926329929001d45e124342cd6b7"}, + {file = "mypy-1.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14d776869a3e6c89c17eb943100f7868f677703c8a4e00b3803918f86aafbc52"}, + {file = "mypy-1.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb2782a036d9eb6b5a6efcdda0986774bf798beef86a62da86cb73e2a10b423d"}, + {file = "mypy-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cfca124f0ac6707747544c127880893ad72a656e136adc935c8600740b21ff5"}, + {file = "mypy-1.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8845125d0b7c57838a10fd8925b0f5f709d0e08568ce587cc862aacce453e3dd"}, + {file = "mypy-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b1b9e1ed40544ef486fa8ac022232ccc57109f379611633ede8e71630d07d2"}, + {file = "mypy-1.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c7cf862aef988b5fbaa17764ad1d21b4831436701c7d2b653156a9497d92c83c"}, + {file = "mypy-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd187d92b6939617f1168a4fe68f68add749902c010e66fe574c165c742ed88"}, + {file = "mypy-1.0.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4e5175026618c178dfba6188228b845b64131034ab3ba52acaffa8f6c361f805"}, + {file = "mypy-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2f6ac8c87e046dc18c7d1d7f6653a66787a4555085b056fe2d599f1f1a2a2d21"}, + {file = "mypy-1.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7306edca1c6f1b5fa0bc9aa645e6ac8393014fa82d0fa180d0ebc990ebe15964"}, + {file = "mypy-1.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3cfad08f16a9c6611e6143485a93de0e1e13f48cfb90bcad7d5fde1c0cec3d36"}, + {file = "mypy-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67cced7f15654710386e5c10b96608f1ee3d5c94ca1da5a2aad5889793a824c1"}, + {file = "mypy-1.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a86b794e8a56ada65c573183756eac8ac5b8d3d59daf9d5ebd72ecdbb7867a43"}, + {file = "mypy-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:50979d5efff8d4135d9db293c6cb2c42260e70fb010cbc697b1311a4d7a39ddb"}, + {file = "mypy-1.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ae4c7a99e5153496243146a3baf33b9beff714464ca386b5f62daad601d87af"}, + {file = "mypy-1.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e398652d005a198a7f3c132426b33c6b85d98aa7dc852137a2a3be8890c4072"}, + {file = "mypy-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be78077064d016bc1b639c2cbcc5be945b47b4261a4f4b7d8923f6c69c5c9457"}, + {file = "mypy-1.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92024447a339400ea00ac228369cd242e988dd775640755fa4ac0c126e49bb74"}, + {file = "mypy-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fe523fcbd52c05040c7bee370d66fee8373c5972171e4fbc323153433198592d"}, + {file = "mypy-1.0.0-py3-none-any.whl", hash = "sha256:2efa963bdddb27cb4a0d42545cd137a8d2b883bd181bbc4525b568ef6eca258f"}, + {file = "mypy-1.0.0.tar.gz", hash = "sha256:f34495079c8d9da05b183f9f7daec2878280c2ad7cc81da686ef0b484cea2ecf"}, ] [package.dependencies] @@ -1186,6 +1188,7 @@ typing-extensions = ">=3.10" [package.extras] dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] python2 = ["typed-ast (>=1.4.0,<2)"] reports = ["lxml"] @@ -1203,18 +1206,18 @@ files = [ [[package]] name = "mypy-zope" -version = "0.3.11" +version = "0.9.0" description = "Plugin for mypy to support zope interfaces" category = "dev" optional = false python-versions = "*" files = [ - {file = "mypy-zope-0.3.11.tar.gz", hash = "sha256:d4255f9f04d48c79083bbd4e2fea06513a6ac7b8de06f8c4ce563fd85142ca05"}, - {file = "mypy_zope-0.3.11-py3-none-any.whl", hash = "sha256:ec080a6508d1f7805c8d2054f9fdd13c849742ce96803519e1fdfa3d3cab7140"}, + {file = "mypy-zope-0.9.0.tar.gz", hash = "sha256:88bf6cd056e38b338e6956055958a7805b4ff84404ccd99e29883a3647a1aeb3"}, + {file = "mypy_zope-0.9.0-py3-none-any.whl", hash = "sha256:e1bb4b57084f76ff8a154a3e07880a1af2ac6536c491dad4b143d529f72c5d15"}, ] [package.dependencies] -mypy = "0.981" +mypy = "1.0.0" "zope.interface" = "*" "zope.schema" = "*" @@ -1705,7 +1708,7 @@ files = [ cffi = ">=1.4.1" [package.extras] -docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] +docs = ["sphinx (>=1.6.5)", "sphinx_rtd_theme"] tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] [[package]] diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 57a6854b1e..cf12b55d21 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -201,7 +201,7 @@ class AuthHandler: for auth_checker_class in INTERACTIVE_AUTH_CHECKERS: inst = auth_checker_class(hs) if inst.is_enabled(): - self.checkers[inst.AUTH_TYPE] = inst # type: ignore + self.checkers[inst.AUTH_TYPE] = inst self.bcrypt_rounds = hs.config.registration.bcrypt_rounds diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index 332edcca24..78a75bfed6 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -13,7 +13,8 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Any +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, ClassVar, Sequence, Type from twisted.web.client import PartialDownloadError @@ -27,19 +28,28 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class UserInteractiveAuthChecker: +class UserInteractiveAuthChecker(ABC): """Abstract base class for an interactive auth checker""" - def __init__(self, hs: "HomeServer"): + # This should really be an "abstract class property", i.e. it should + # be an error to instantiate a subclass that doesn't specify an AUTH_TYPE. + # But calling this a `ClassVar` is simpler than a decorator stack of + # @property @abstractmethod and @classmethod (if that's even the right order). + AUTH_TYPE: ClassVar[str] + + def __init__(self, hs: "HomeServer"): # noqa: B027 pass + @abstractmethod def is_enabled(self) -> bool: """Check if the configuration of the homeserver allows this checker to work Returns: True if this login type is enabled. """ + raise NotImplementedError() + @abstractmethod async def check_auth(self, authdict: dict, clientip: str) -> Any: """Given the authentication dict from the client, attempt to check this step @@ -304,7 +314,7 @@ class RegistrationTokenAuthChecker(UserInteractiveAuthChecker): ) -INTERACTIVE_AUTH_CHECKERS = [ +INTERACTIVE_AUTH_CHECKERS: Sequence[Type[UserInteractiveAuthChecker]] = [ DummyAuthChecker, TermsAuthChecker, RecaptchaAuthChecker, diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index b92f1d3d1a..312aab4dcc 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -1267,7 +1267,7 @@ class MatrixFederationHttpClient: def _flatten_response_never_received(e: BaseException) -> str: if hasattr(e, "reasons"): reasons = ", ".join( - _flatten_response_never_received(f.value) for f in e.reasons # type: ignore[attr-defined] + _flatten_response_never_received(f.value) for f in e.reasons ) return "%s:[%s]" % (type(e).__name__, reasons) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 6c7cf1b294..5aed71262f 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -188,7 +188,7 @@ from typing import ( ) import attr -from typing_extensions import ParamSpec +from typing_extensions import Concatenate, ParamSpec from twisted.internet import defer from twisted.web.http import Request @@ -445,7 +445,7 @@ def init_tracer(hs: "HomeServer") -> None: opentracing = None # type: ignore[assignment] return - if not opentracing or not JaegerConfig: + if opentracing is None or JaegerConfig is None: raise ConfigError( "The server has been configured to use opentracing but opentracing is not " "installed." @@ -872,7 +872,7 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte def _custom_sync_async_decorator( func: Callable[P, R], - wrapping_logic: Callable[[Callable[P, R], Any, Any], ContextManager[None]], + wrapping_logic: Callable[Concatenate[Callable[P, R], P], ContextManager[None]], ) -> Callable[P, R]: """ Decorates a function that is sync or async (coroutines), or that returns a Twisted @@ -902,10 +902,14 @@ def _custom_sync_async_decorator( """ if inspect.iscoroutinefunction(func): - + # In this branch, R = Awaitable[RInner], for some other type RInner @wraps(func) - async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R: + async def _wrapper( + *args: P.args, **kwargs: P.kwargs + ) -> Any: # Return type is RInner with wrapping_logic(func, *args, **kwargs): + # type-ignore: func() returns R, but mypy doesn't know that R is + # Awaitable here. return await func(*args, **kwargs) # type: ignore[misc] else: @@ -972,7 +976,11 @@ def trace_with_opname( if not opentracing: return func - return _custom_sync_async_decorator(func, _wrapping_logic) + # type-ignore: mypy seems to be confused by the ParamSpecs here. + # I think the problem is https://github.com/python/mypy/issues/12909 + return _custom_sync_async_decorator( + func, _wrapping_logic # type: ignore[arg-type] + ) return _decorator @@ -1018,7 +1026,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: set_tag(SynapseTags.FUNC_KWARGS, str(kwargs)) yield - return _custom_sync_async_decorator(func, _wrapping_logic) + # type-ignore: mypy seems to be confused by the ParamSpecs here. + # I think the problem is https://github.com/python/mypy/issues/12909 + return _custom_sync_async_decorator(func, _wrapping_logic) # type: ignore[arg-type] @contextlib.contextmanager diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index d30878f704..6e035afcce 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,6 +16,7 @@ import logging import os import urllib +from abc import ABC, abstractmethod from types import TracebackType from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type @@ -284,13 +285,14 @@ async def respond_with_responder( finish_request(request) -class Responder: +class Responder(ABC): """Represents a response that can be streamed to the requester. Responder is a context manager which *must* be used, so that any resources held can be cleaned up. """ + @abstractmethod def write_to_consumer(self, consumer: IConsumer) -> Awaitable: """Stream response into consumer @@ -300,11 +302,12 @@ class Responder: Returns: Resolves once the response has finished being written """ + raise NotImplementedError() - def __enter__(self) -> None: + def __enter__(self) -> None: # noqa: B027 pass - def __exit__( + def __exit__( # noqa: B027 self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index a182e8a098..d1ccb7390a 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -25,7 +25,7 @@ try: except ImportError: class PostgresEngine(BaseDatabaseEngine): # type: ignore[no-redef] - def __new__(cls, *args: object, **kwargs: object) -> NoReturn: # type: ignore[misc] + def __new__(cls, *args: object, **kwargs: object) -> NoReturn: raise RuntimeError( f"Cannot create {cls.__name__} -- psycopg2 module is not installed" ) @@ -36,7 +36,7 @@ try: except ImportError: class Sqlite3Engine(BaseDatabaseEngine): # type: ignore[no-redef] - def __new__(cls, *args: object, **kwargs: object) -> NoReturn: # type: ignore[misc] + def __new__(cls, *args: object, **kwargs: object) -> NoReturn: raise RuntimeError( f"Cannot create {cls.__name__} -- sqlite3 module is not installed" ) diff --git a/synapse/storage/types.py b/synapse/storage/types.py index 0031df1e06..56a0048539 100644 --- a/synapse/storage/types.py +++ b/synapse/storage/types.py @@ -12,7 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. from types import TracebackType -from typing import Any, Iterator, List, Mapping, Optional, Sequence, Tuple, Type, Union +from typing import ( + Any, + Callable, + Iterator, + List, + Mapping, + Optional, + Sequence, + Tuple, + Type, + Union, +) from typing_extensions import Protocol @@ -112,15 +123,35 @@ class DBAPI2Module(Protocol): # extends from this hierarchy. See # https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#exceptions # https://www.postgresql.org/docs/current/errcodes-appendix.html#ERRCODES-TABLE - Warning: Type[Exception] - Error: Type[Exception] + # + # Note: rather than + # x: T + # we write + # @property + # def x(self) -> T: ... + # which expresses that the protocol attribute `x` is read-only. The mypy docs + # https://mypy.readthedocs.io/en/latest/common_issues.html#covariant-subtyping-of-mutable-protocol-members-is-rejected + # explain why this is necessary for safety. TL;DR: we shouldn't be able to write + # to `x`, only read from it. See also https://github.com/python/mypy/issues/6002 . + @property + def Warning(self) -> Type[Exception]: + ... + + @property + def Error(self) -> Type[Exception]: + ... # Errors are divided into `InterfaceError`s (something went wrong in the database # driver) and `DatabaseError`s (something went wrong in the database). These are # both subclasses of `Error`, but we can't currently express this in type # annotations due to https://github.com/python/mypy/issues/8397 - InterfaceError: Type[Exception] - DatabaseError: Type[Exception] + @property + def InterfaceError(self) -> Type[Exception]: + ... + + @property + def DatabaseError(self) -> Type[Exception]: + ... # Everything below is a subclass of `DatabaseError`. @@ -128,7 +159,9 @@ class DBAPI2Module(Protocol): # - An integer was too big for its data type. # - An invalid date time was provided. # - A string contained a null code point. - DataError: Type[Exception] + @property + def DataError(self) -> Type[Exception]: + ... # Roughly: something went wrong in the database, but it's not within the application # programmer's control. Examples: @@ -138,28 +171,45 @@ class DBAPI2Module(Protocol): # - A serialisation failure occurred. # - The database ran out of resources, such as storage, memory, connections, etc. # - The database encountered an error from the operating system. - OperationalError: Type[Exception] + @property + def OperationalError(self) -> Type[Exception]: + ... # Roughly: we've given the database data which breaks a rule we asked it to enforce. # Examples: # - Stop, criminal scum! You violated the foreign key constraint # - Also check constraints, non-null constraints, etc. - IntegrityError: Type[Exception] + @property + def IntegrityError(self) -> Type[Exception]: + ... # Roughly: something went wrong within the database server itself. - InternalError: Type[Exception] + @property + def InternalError(self) -> Type[Exception]: + ... # Roughly: the application did something silly that needs to be fixed. Examples: # - We don't have permissions to do something. # - We tried to create a table with duplicate column names. # - We tried to use a reserved name. # - We referred to a column that doesn't exist. - ProgrammingError: Type[Exception] + @property + def ProgrammingError(self) -> Type[Exception]: + ... # Roughly: we've tried to do something that this database doesn't support. - NotSupportedError: Type[Exception] + @property + def NotSupportedError(self) -> Type[Exception]: + ... - def connect(self, **parameters: object) -> Connection: + # We originally wrote + # def connect(self, *args, **kwargs) -> Connection: ... + # But mypy doesn't seem to like that because sqlite3.connect takes a mandatory + # positional argument. We can't make that part of the signature though, because + # psycopg2.connect doesn't have a mandatory positional argument. Instead, we use + # the following slightly unusual workaround. + @property + def connect(self) -> Callable[..., Connection]: ... diff --git a/synapse/streams/__init__.py b/synapse/streams/__init__.py index c6c8a0315c..8a48ffc48d 100644 --- a/synapse/streams/__init__.py +++ b/synapse/streams/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from abc import ABC, abstractmethod from typing import Generic, List, Optional, Tuple, TypeVar from synapse.types import StrCollection, UserID @@ -22,7 +22,8 @@ K = TypeVar("K") R = TypeVar("R") -class EventSource(Generic[K, R]): +class EventSource(ABC, Generic[K, R]): + @abstractmethod async def get_new_events( self, user: UserID, @@ -32,4 +33,4 @@ class EventSource(Generic[K, R]): is_guest: bool, explicit_room_id: Optional[str] = None, ) -> Tuple[List[R], K]: - ... + raise NotImplementedError() diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 782ef09cf4..1db99b3c00 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -62,7 +62,7 @@ class TestSpamChecker: request_info: Collection[Tuple[str, str]], auth_provider_id: Optional[str], ) -> RegistrationBehaviour: - pass + return RegistrationBehaviour.ALLOW class DenyAll(TestSpamChecker): @@ -111,7 +111,7 @@ class TestLegacyRegistrationSpamChecker: username: Optional[str], request_info: Collection[Tuple[str, str]], ) -> RegistrationBehaviour: - pass + return RegistrationBehaviour.ALLOW class LegacyAllowAll(TestLegacyRegistrationSpamChecker): diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index acfdcd3bca..d27422515c 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -63,7 +63,7 @@ from tests.http import ( get_test_ca_cert_file, ) from tests.server import FakeTransport, ThreadedMemoryReactorClock -from tests.utils import default_config +from tests.utils import checked_cast, default_config logger = logging.getLogger(__name__) @@ -146,8 +146,10 @@ class MatrixFederationAgentTests(unittest.TestCase): # # Normally this would be done by the TCP socket code in Twisted, but we are # stubbing that out here. - client_protocol = client_factory.buildProtocol(dummy_address) - assert isinstance(client_protocol, _WrappingProtocol) + # NB: we use a checked_cast here to workaround https://github.com/Shoobx/mypy-zope/issues/91) + client_protocol = checked_cast( + _WrappingProtocol, client_factory.buildProtocol(dummy_address) + ) client_protocol.makeConnection( FakeTransport(server_protocol, self.reactor, client_protocol) ) @@ -446,7 +448,6 @@ class MatrixFederationAgentTests(unittest.TestCase): server_ssl_protocol = _wrap_server_factory_for_tls( _get_test_protocol_factory() ).buildProtocol(dummy_address) - assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol) # Tell the HTTP server to send outgoing traffic back via the proxy's transport. proxy_server_transport = proxy_server.transport @@ -1529,7 +1530,7 @@ def _check_logcontext(context: LoggingContextOrSentinel) -> None: def _wrap_server_factory_for_tls( factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None -) -> IProtocolFactory: +) -> TLSMemoryBIOFactory: """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory The resultant factory will create a TLS server which presents a certificate signed by our test CA, valid for the domains in `sanlist` diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index a817940730..22fdc7f5f2 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -43,6 +43,7 @@ from tests.http import ( ) from tests.server import FakeTransport, ThreadedMemoryReactorClock from tests.unittest import TestCase +from tests.utils import checked_cast logger = logging.getLogger(__name__) @@ -620,7 +621,6 @@ class MatrixFederationAgentTests(TestCase): server_ssl_protocol = _wrap_server_factory_for_tls( _get_test_protocol_factory() ).buildProtocol(dummy_address) - assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol) # Tell the HTTP server to send outgoing traffic back via the proxy's transport. proxy_server_transport = proxy_server.transport @@ -757,12 +757,14 @@ class MatrixFederationAgentTests(TestCase): assert isinstance(proxy_server, HTTPChannel) # fish the transports back out so that we can do the old switcheroo - s2c_transport = proxy_server.transport - assert isinstance(s2c_transport, FakeTransport) - client_protocol = s2c_transport.other - assert isinstance(client_protocol, _WrappingProtocol) - c2s_transport = client_protocol.transport - assert isinstance(c2s_transport, FakeTransport) + # To help mypy out with the various Protocols and wrappers and mocks, we do + # some explicit casting. Without the casts, we hit the bug I reported at + # https://github.com/Shoobx/mypy-zope/issues/91 . + # We also double-checked these casts at runtime (test-time) because I found it + # quite confusing to deduce these types in the first place! + s2c_transport = checked_cast(FakeTransport, proxy_server.transport) + client_protocol = checked_cast(_WrappingProtocol, s2c_transport.other) + c2s_transport = checked_cast(FakeTransport, client_protocol.transport) # the FakeTransport is async, so we need to pump the reactor self.reactor.advance(0) @@ -822,9 +824,9 @@ class MatrixFederationAgentTests(TestCase): @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"}) def test_proxy_with_no_scheme(self) -> None: http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) - self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com") - self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888) + proxy_ep = checked_cast(HostnameEndpoint, http_proxy_agent.http_proxy_endpoint) + self.assertEqual(proxy_ep._hostStr, "proxy.com") + self.assertEqual(proxy_ep._port, 8888) @patch.dict(os.environ, {"http_proxy": "socks://proxy.com:8888"}) def test_proxy_with_unsupported_scheme(self) -> None: @@ -834,25 +836,21 @@ class MatrixFederationAgentTests(TestCase): @patch.dict(os.environ, {"http_proxy": "http://proxy.com:8888"}) def test_proxy_with_http_scheme(self) -> None: http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) - self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com") - self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888) + proxy_ep = checked_cast(HostnameEndpoint, http_proxy_agent.http_proxy_endpoint) + self.assertEqual(proxy_ep._hostStr, "proxy.com") + self.assertEqual(proxy_ep._port, 8888) @patch.dict(os.environ, {"http_proxy": "https://proxy.com:8888"}) def test_proxy_with_https_scheme(self) -> None: https_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - assert isinstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint) - self.assertEqual( - https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._hostStr, "proxy.com" - ) - self.assertEqual( - https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._port, 8888 - ) + proxy_ep = checked_cast(_WrapperEndpoint, https_proxy_agent.http_proxy_endpoint) + self.assertEqual(proxy_ep._wrappedEndpoint._hostStr, "proxy.com") + self.assertEqual(proxy_ep._wrappedEndpoint._port, 8888) def _wrap_server_factory_for_tls( factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None -) -> IProtocolFactory: +) -> TLSMemoryBIOFactory: """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory The resultant factory will create a TLS server which presents a certificate diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py index c08954d887..5191e31a8a 100644 --- a/tests/logging/test_remote_handler.py +++ b/tests/logging/test_remote_handler.py @@ -21,6 +21,7 @@ from synapse.logging import RemoteHandler from tests.logging import LoggerCleanupMixin from tests.server import FakeTransport, get_clock from tests.unittest import TestCase +from tests.utils import checked_cast def connect_logging_client( @@ -56,8 +57,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): client, server = connect_logging_client(self.reactor, 0) # Trigger data being sent - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # One log message, with a single trailing newline logs = server.data.decode("utf8").splitlines() @@ -89,8 +90,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): # Allow the reconnection client, server = connect_logging_client(self.reactor, 0) - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # Only the 7 infos made it through, the debugs were elided logs = server.data.splitlines() @@ -123,8 +124,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): # Allow the reconnection client, server = connect_logging_client(self.reactor, 0) - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # The 10 warnings made it through, the debugs and infos were elided logs = server.data.splitlines() @@ -148,8 +149,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): # Allow the reconnection client, server = connect_logging_client(self.reactor, 0) - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # The first five and last five warnings made it through, the debugs and # infos were elided diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index 208ec44829..f4e1e7de43 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -43,6 +43,9 @@ class DummyRecaptchaChecker(UserInteractiveAuthChecker): super().__init__(hs) self.recaptcha_attempts: List[Tuple[dict, str]] = [] + def is_enabled(self) -> bool: + return True + def check_auth(self, authdict: dict, clientip: str) -> Any: self.recaptcha_attempts.append((authdict, clientip)) return succeed(True) diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 3325d43a2f..5fa3440691 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -425,7 +425,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): async def test_fn( event: EventBase, state_events: StateMap[EventBase] ) -> Tuple[bool, Optional[JsonDict]]: - if event.is_state and event.type == EventTypes.PowerLevels: + if event.is_state() and event.type == EventTypes.PowerLevels: await api.create_and_send_event_into_room( { "room_id": event.room_id, diff --git a/tests/utils.py b/tests/utils.py index 15fabbc2d0..a0ac11bc5c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -15,7 +15,7 @@ import atexit import os -from typing import Any, Callable, Dict, List, Tuple, Union, overload +from typing import Any, Callable, Dict, List, Tuple, Type, TypeVar, Union, overload import attr from typing_extensions import Literal, ParamSpec @@ -341,3 +341,27 @@ async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None: context = await unpersisted_context.persist(event) await persistence_store.persist_event(event, context) + + +T = TypeVar("T") + + +def checked_cast(type: Type[T], x: object) -> T: + """A version of typing.cast that is checked at runtime. + + We have our own function for this for two reasons: + + 1. typing.cast itself is deliberately a no-op at runtime, see + https://docs.python.org/3/library/typing.html#typing.cast + 2. To help workaround a mypy-zope bug https://github.com/Shoobx/mypy-zope/issues/91 + where mypy would erroneously consider `isinstance(x, type)` to be false in all + circumstances. + + For this to make sense, `T` needs to be something that `isinstance` can check; see + https://docs.python.org/3/library/functions.html?highlight=isinstance#isinstance + https://docs.python.org/3/glossary.html#term-abstract-base-class + https://docs.python.org/3/library/typing.html#typing.runtime_checkable + for more details. + """ + assert isinstance(x, type) + return x -- cgit 1.5.1 From 4f4f27e57fdab1d7cc6e275b8acabc785952205e Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 17 Feb 2023 09:40:32 +0000 Subject: Mitigate a race where /make_join could 403 for restricted rooms (#15080) Previously, when creating a join event in /make_join, we would decide whether to include additional fields to satisfy restricted room checks based on the current state of the room. Then, when building the event, we would capture the forward extremities of the room to use as prev events. This is subject to race conditions. For example, when leaving and rejoining a room, the following sequence of events leads to a misleading 403 response: 1. /make_join reads the current state of the room and sees that the user is still in the room. It decides to omit the field required for restricted room joins. 2. The leave event is persisted and the room's forward extremities are updated. 3. /make_join builds the event, using the post-leave forward extremities. The event then fails the restricted room checks. To mitigate the race, we move the read of the forward extremities closer to the read of the current state. Ideally, we would compute the state based off the chosen prev events, but that can involve state resolution, which is expensive. Signed-off-by: Sean Quah --- changelog.d/15080.bugfix | 1 + synapse/handlers/federation.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15080.bugfix (limited to 'synapse') diff --git a/changelog.d/15080.bugfix b/changelog.d/15080.bugfix new file mode 100644 index 0000000000..965d0b921e --- /dev/null +++ b/changelog.d/15080.bugfix @@ -0,0 +1 @@ +Reduce the likelihood of a rare race condition where rejoining a restricted room over federation would fail. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1d0f6bcd6f..5f2057269d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -952,7 +952,20 @@ class FederationHandler: # # Note that this requires the /send_join request to come back to the # same server. + prev_event_ids = None if room_version.msc3083_join_rules: + # Note that the room's state can change out from under us and render our + # nice join rules-conformant event non-conformant by the time we build the + # event. When this happens, our validation at the end fails and we respond + # to the requesting server with a 403, which is misleading — it indicates + # that the user is not allowed to join the room and the joining server + # should not bother retrying via this homeserver or any others, when + # in fact we've just messed up with building the event. + # + # To reduce the likelihood of this race, we capture the forward extremities + # of the room (prev_event_ids) just before fetching the current state, and + # hope that the state we fetch corresponds to the prev events we chose. + prev_event_ids = await self.store.get_prev_events_for_room(room_id) state_ids = await self._state_storage_controller.get_current_state_ids( room_id ) @@ -994,7 +1007,8 @@ class FederationHandler: event, unpersisted_context, ) = await self.event_creation_handler.create_new_client_event( - builder=builder + builder=builder, + prev_event_ids=prev_event_ids, ) except SynapseError as e: logger.warning("Failed to create join to %s because %s", room_id, e) -- cgit 1.5.1 From 61bfcd669ae596a8df940f434e3e2335059100b1 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Fri, 17 Feb 2023 14:54:55 +0100 Subject: Add account data to export command (#14969) * Add account data to to export command * newsfile * remove not needed function * update newsfile * adopt #14973 --- changelog.d/14969.feature | 1 + docs/usage/administration/admin_faq.md | 3 +++ synapse/app/admin_cmd.py | 15 ++++++++++- synapse/handlers/admin.py | 49 +++++++++++++++++++++++----------- tests/handlers/test_admin.py | 27 +++++++++++++++++++ 5 files changed, 79 insertions(+), 16 deletions(-) create mode 100644 changelog.d/14969.feature (limited to 'synapse') diff --git a/changelog.d/14969.feature b/changelog.d/14969.feature new file mode 100644 index 0000000000..a4680ef9c8 --- /dev/null +++ b/changelog.d/14969.feature @@ -0,0 +1 @@ +Add account data to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.78/usage/administration/admin_faq.html#how-can-i-export-user-data). \ No newline at end of file diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 7a27741199..925e1d175e 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -71,6 +71,9 @@ output-directory │ ├───invite_state │ └───knock_state └───user_data + ├───account_data + │ ├───global + │ └─── ├───connections ├───devices └───profile diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index fe7afb9475..ad51f33165 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -17,7 +17,7 @@ import logging import os import sys import tempfile -from typing import List, Optional +from typing import List, Mapping, Optional from twisted.internet import defer, task @@ -222,6 +222,19 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(connection_file, "a") as f: print(json.dumps(connection), file=f) + def write_account_data( + self, file_name: str, account_data: Mapping[str, JsonDict] + ) -> None: + account_data_directory = os.path.join( + self.base_directory, "user_data", "account_data" + ) + os.makedirs(account_data_directory, exist_ok=True) + + account_data_file = os.path.join(account_data_directory, file_name) + + with open(account_data_file, "a") as f: + print(json.dumps(account_data), file=f) + def finished(self) -> str: return self.base_directory diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index b03c214b14..8b7760b2cc 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -14,7 +14,7 @@ import abc import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set +from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set from synapse.api.constants import Direction, Membership from synapse.events import EventBase @@ -29,7 +29,7 @@ logger = logging.getLogger(__name__) class AdminHandler: def __init__(self, hs: "HomeServer"): - self.store = hs.get_datastores().main + self._store = hs.get_datastores().main self._device_handler = hs.get_device_handler() self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state @@ -38,7 +38,7 @@ class AdminHandler: async def get_whois(self, user: UserID) -> JsonDict: connections = [] - sessions = await self.store.get_user_ip_and_agents(user) + sessions = await self._store.get_user_ip_and_agents(user) for session in sessions: connections.append( { @@ -57,7 +57,7 @@ class AdminHandler: async def get_user(self, user: UserID) -> Optional[JsonDict]: """Function to get user details""" - user_info_dict = await self.store.get_user_by_id(user.to_string()) + user_info_dict = await self._store.get_user_by_id(user.to_string()) if user_info_dict is None: return None @@ -89,11 +89,11 @@ class AdminHandler: } # Add additional user metadata - profile = await self.store.get_profileinfo(user.localpart) - threepids = await self.store.user_get_threepids(user.to_string()) + profile = await self._store.get_profileinfo(user.localpart) + threepids = await self._store.user_get_threepids(user.to_string()) external_ids = [ ({"auth_provider": auth_provider, "external_id": external_id}) - for auth_provider, external_id in await self.store.get_external_ids_by_user( + for auth_provider, external_id in await self._store.get_external_ids_by_user( user.to_string() ) ] @@ -101,7 +101,7 @@ class AdminHandler: user_info_dict["avatar_url"] = profile.avatar_url user_info_dict["threepids"] = threepids user_info_dict["external_ids"] = external_ids - user_info_dict["erased"] = await self.store.is_user_erased(user.to_string()) + user_info_dict["erased"] = await self._store.is_user_erased(user.to_string()) return user_info_dict @@ -117,7 +117,7 @@ class AdminHandler: The returned value is that returned by `writer.finished()`. """ # Get all rooms the user is in or has been in - rooms = await self.store.get_rooms_for_local_user_where_membership_is( + rooms = await self._store.get_rooms_for_local_user_where_membership_is( user_id, membership_list=( Membership.JOIN, @@ -131,7 +131,7 @@ class AdminHandler: # We only try and fetch events for rooms the user has been in. If # they've been e.g. invited to a room without joining then we handle # those separately. - rooms_user_has_been_in = await self.store.get_rooms_user_has_been_in(user_id) + rooms_user_has_been_in = await self._store.get_rooms_user_has_been_in(user_id) for index, room in enumerate(rooms): room_id = room.room_id @@ -140,7 +140,7 @@ class AdminHandler: "[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms) ) - forgotten = await self.store.did_forget(user_id, room_id) + forgotten = await self._store.did_forget(user_id, room_id) if forgotten: logger.info("[%s] User forgot room %d, ignoring", user_id, room_id) continue @@ -152,14 +152,14 @@ class AdminHandler: if room.membership == Membership.INVITE: event_id = room.event_id - invite = await self.store.get_event(event_id, allow_none=True) + invite = await self._store.get_event(event_id, allow_none=True) if invite: invited_state = invite.unsigned["invite_room_state"] writer.write_invite(room_id, invite, invited_state) if room.membership == Membership.KNOCK: event_id = room.event_id - knock = await self.store.get_event(event_id, allow_none=True) + knock = await self._store.get_event(event_id, allow_none=True) if knock: knock_state = knock.unsigned["knock_room_state"] writer.write_knock(room_id, knock, knock_state) @@ -170,7 +170,7 @@ class AdminHandler: # were joined. We estimate that point by looking at the # stream_ordering of the last membership if it wasn't a join. if room.membership == Membership.JOIN: - stream_ordering = self.store.get_room_max_stream_ordering() + stream_ordering = self._store.get_room_max_stream_ordering() else: stream_ordering = room.stream_ordering @@ -197,7 +197,7 @@ class AdminHandler: # events that we have and then filtering, this isn't the most # efficient method perhaps but it does guarantee we get everything. while True: - events, _ = await self.store.paginate_room_events( + events, _ = await self._store.paginate_room_events( room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS ) if not events: @@ -263,6 +263,13 @@ class AdminHandler: connections["devices"][""]["sessions"][0]["connections"] ) + # Get all account data the user has global and in rooms + global_data = await self._store.get_global_account_data_for_user(user_id) + by_room_data = await self._store.get_room_account_data_for_user(user_id) + writer.write_account_data("global", global_data) + for room_id in by_room_data: + writer.write_account_data(room_id, by_room_data[room_id]) + return writer.finished() @@ -340,6 +347,18 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): """ raise NotImplementedError() + @abc.abstractmethod + def write_account_data( + self, file_name: str, account_data: Mapping[str, JsonDict] + ) -> None: + """Write the account data of a user. + + Args: + file_name: file name to write data + account_data: mapping of global or room account_data + """ + raise NotImplementedError() + @abc.abstractmethod def finished(self) -> Any: """Called when all data has successfully been exported and written. diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index 6f300b8e11..1b97aaeed1 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -296,3 +296,30 @@ class ExfiltrateData(unittest.HomeserverTestCase): self.assertEqual(args[0][0]["user_agent"], "user_agent") self.assertGreater(args[0][0]["last_seen"], 0) self.assertNotIn("access_token", args[0][0]) + + def test_account_data(self) -> None: + """Tests that user account data get exported.""" + # add account data + self.get_success( + self._store.add_account_data_for_user(self.user2, "m.global", {"a": 1}) + ) + self.get_success( + self._store.add_account_data_to_room( + self.user2, "test_room", "m.per_room", {"b": 2} + ) + ) + + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + # two calls, one call for user data and one call for room data + writer.write_account_data.assert_called() + + args = writer.write_account_data.call_args_list[0][0] + self.assertEqual(args[0], "global") + self.assertEqual(args[1]["m.global"]["a"], 1) + + args = writer.write_account_data.call_args_list[1][0] + self.assertEqual(args[0], "test_room") + self.assertEqual(args[1]["m.per_room"]["b"], 2) -- cgit 1.5.1 From 1cbc3f197cc1b9732649ffb769b05d90c0e904d7 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 20 Feb 2023 12:00:18 +0000 Subject: Fix a bug introduced in Synapse v1.74.0 where searching with colons when using ICU for search term tokenisation would fail with an error. (#15079) Co-authored-by: David Robertson --- changelog.d/15079.bugfix | 1 + synapse/storage/databases/main/user_directory.py | 24 +++++++-- tests/handlers/test_user_directory.py | 7 +++ tests/storage/test_user_directory.py | 63 +++++++++++++++++++++++- 4 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 changelog.d/15079.bugfix (limited to 'synapse') diff --git a/changelog.d/15079.bugfix b/changelog.d/15079.bugfix new file mode 100644 index 0000000000..907892c1ef --- /dev/null +++ b/changelog.d/15079.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse v1.74.0 where searching with colons when using ICU for search term tokenisation would fail with an error. \ No newline at end of file diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index f6a6fd4079..30af4b3b6c 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -918,11 +918,19 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]: We use this so that we can add prefix matching, which isn't something that is supported by default. """ - results = _parse_words(search_term) + escaped_words = [] + for word in _parse_words(search_term): + # Postgres tsvector and tsquery quoting rules: + # words potentially containing punctuation should be quoted + # and then existing quotes and backslashes should be doubled + # See: https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY + + quoted_word = word.replace("'", "''").replace("\\", "\\\\") + escaped_words.append(f"'{quoted_word}'") - both = " & ".join("(%s:* | %s)" % (result, result) for result in results) - exact = " & ".join("%s" % (result,) for result in results) - prefix = " & ".join("%s:*" % (result,) for result in results) + both = " & ".join("(%s:* | %s)" % (word, word) for word in escaped_words) + exact = " & ".join("%s" % (word,) for word in escaped_words) + prefix = " & ".join("%s:*" % (word,) for word in escaped_words) return both, exact, prefix @@ -944,6 +952,14 @@ def _parse_words(search_term: str) -> List[str]: if USE_ICU: return _parse_words_with_icu(search_term) + return _parse_words_with_regex(search_term) + + +def _parse_words_with_regex(search_term: str) -> List[str]: + """ + Break down search term into words, when we don't have ICU available. + See: `_parse_words` + """ return re.findall(r"([\w\-]+)", search_term, re.UNICODE) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index f65a68b9c2..a02c1c6227 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -192,6 +192,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.join(room, self.appservice.sender, tok=self.appservice.token) self._check_only_one_user_in_directory(user, room) + def test_search_term_with_colon_in_it_does_not_raise(self) -> None: + """ + Regression test: Test that search terms with colons in them are acceptable. + """ + u1 = self.register_user("user1", "pass") + self.get_success(self.handler.search_users(u1, "haha:paamayim-nekudotayim", 10)) + def test_user_not_in_users_table(self) -> None: """Unclear how it happens, but on matrix.org we've seen join events for users who aren't in the users table. Test that we don't fall over diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index f1ca523d23..2d169684cf 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -25,6 +25,11 @@ from synapse.rest.client import login, register, room from synapse.server import HomeServer from synapse.storage import DataStore from synapse.storage.background_updates import _BackgroundUpdateHandler +from synapse.storage.databases.main import user_directory +from synapse.storage.databases.main.user_directory import ( + _parse_words_with_icu, + _parse_words_with_regex, +) from synapse.storage.roommember import ProfileInfo from synapse.util import Clock @@ -42,7 +47,7 @@ ALICE = "@alice:a" BOB = "@bob:b" BOBBY = "@bobby:a" # The localpart isn't 'Bela' on purpose so we can test looking up display names. -BELA = "@somenickname:a" +BELA = "@somenickname:example.org" class GetUserDirectoryTables: @@ -423,6 +428,8 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): class UserDirectoryStoreTestCase(HomeserverTestCase): + use_icu = False + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main @@ -434,6 +441,12 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): self.get_success(self.store.update_profile_in_user_dir(BELA, "Bela", None)) self.get_success(self.store.add_users_in_public_rooms("!room:id", (ALICE, BOB))) + self._restore_use_icu = user_directory.USE_ICU + user_directory.USE_ICU = self.use_icu + + def tearDown(self) -> None: + user_directory.USE_ICU = self._restore_use_icu + def test_search_user_dir(self) -> None: # normally when alice searches the directory she should just find # bob because bobby doesn't share a room with her. @@ -478,6 +491,26 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, ) + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_start_of_user_id(self) -> None: + """Tests that a user can look up another user by searching for the start + of their user ID. + """ + r = self.get_success(self.store.search_user_dir(ALICE, "somenickname:exa", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, + ) + + +class UserDirectoryStoreTestCaseWithIcu(UserDirectoryStoreTestCase): + use_icu = True + + if not icu: + skip = "Requires PyICU" + class UserDirectoryICUTestCase(HomeserverTestCase): if not icu: @@ -513,3 +546,31 @@ class UserDirectoryICUTestCase(HomeserverTestCase): r["results"][0], {"user_id": ALICE, "display_name": display_name, "avatar_url": None}, ) + + def test_icu_word_boundary_punctuation(self) -> None: + """ + Tests the behaviour of punctuation with the ICU tokeniser. + + Seems to depend on underlying version of ICU. + """ + + # Note: either tokenisation is fine, because Postgres actually splits + # words itself afterwards. + self.assertIn( + _parse_words_with_icu("lazy'fox jumped:over the.dog"), + ( + # ICU 66 on Ubuntu 20.04 + ["lazy'fox", "jumped", "over", "the", "dog"], + # ICU 70 on Ubuntu 22.04 + ["lazy'fox", "jumped:over", "the.dog"], + ), + ) + + def test_regex_word_boundary_punctuation(self) -> None: + """ + Tests the behaviour of punctuation with the non-ICU tokeniser + """ + self.assertEqual( + _parse_words_with_regex("lazy'fox jumped:over the.dog"), + ["lazy", "fox", "jumped", "over", "the", "dog"], + ) -- cgit 1.5.1 From 490a3675bd7225b5695e505fea225d7c30127551 Mon Sep 17 00:00:00 2001 From: realtyem Date: Mon, 20 Feb 2023 06:23:00 -0600 Subject: Allow health listener resource to load (#15096) * Allow health listener resource to load. * changelog * Update changelog.d/15096.bugfix --- changelog.d/15096.bugfix | 1 + synapse/config/server.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/15096.bugfix (limited to 'synapse') diff --git a/changelog.d/15096.bugfix b/changelog.d/15096.bugfix new file mode 100644 index 0000000000..09b4d861f8 --- /dev/null +++ b/changelog.d/15096.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.76 where workers would fail to start if the `health` listener was configured. diff --git a/synapse/config/server.py b/synapse/config/server.py index ecdaa2d9dd..d4ef9930b0 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -177,6 +177,7 @@ KNOWN_RESOURCES = { "client", "consent", "federation", + "health", "keys", "media", "metrics", -- cgit 1.5.1 From e26d7d5ae786df8d9d9a4dbd0f734e5c2f08aafd Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 20 Feb 2023 13:35:24 +0000 Subject: Teach portdb about `un_partial_stated_event_stream` (#15108) * Sort BOOLEAN_COLUMNS and APPEND_ONLY_TABLES So I can see if a given table is present in logarithmic time, rather than linear. * Teach portdb about `un_partial_stated_event_streams` * Comments comments comments * Changelog --- changelog.d/15108.bugfix | 1 + synapse/_scripts/synapse_port_db.py | 85 +++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 33 deletions(-) create mode 100644 changelog.d/15108.bugfix (limited to 'synapse') diff --git a/changelog.d/15108.bugfix b/changelog.d/15108.bugfix new file mode 100644 index 0000000000..30af8b439d --- /dev/null +++ b/changelog.d/15108.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.75 where the [portdb script](https://matrix-org.github.io/synapse/release-v1.78/postgres.html#porting-from-sqlite) would fail to run after a room had been faster-joined. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 5e137dbbf7..0d35e0af8f 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -94,61 +94,80 @@ reactor = cast(ISynapseReactor, reactor_) logger = logging.getLogger("synapse_port_db") +# SQLite doesn't have a dedicated boolean type (it stores True/False as 1/0). This means +# portdb will read sqlite bools as integers, then try to insert them into postgres +# boolean columns---which fails. Lacking some Python-parseable metaschema, we must +# specify which integer columns should be inserted as booleans into postgres. BOOLEAN_COLUMNS = { - "events": ["processed", "outlier", "contains_url"], - "rooms": ["is_public", "has_auth_chain_index"], + "access_tokens": ["used"], + "account_validity": ["email_sent"], + "device_lists_changes_in_room": ["converted_to_destinations"], + "device_lists_outbound_pokes": ["sent"], + "devices": ["hidden"], + "e2e_fallback_keys_json": ["used"], + "e2e_room_keys": ["is_verified"], "event_edges": ["is_state"], + "events": ["processed", "outlier", "contains_url"], + "local_media_repository": ["safe_from_quarantine"], "presence_list": ["accepted"], "presence_stream": ["currently_active"], "public_room_list_stream": ["visibility"], - "devices": ["hidden"], - "device_lists_outbound_pokes": ["sent"], - "users_who_share_rooms": ["share_private"], - "e2e_room_keys": ["is_verified"], - "account_validity": ["email_sent"], + "pushers": ["enabled"], "redactions": ["have_censored"], "room_stats_state": ["is_federatable"], - "local_media_repository": ["safe_from_quarantine"], + "rooms": ["is_public", "has_auth_chain_index"], "users": ["shadow_banned", "approved"], - "e2e_fallback_keys_json": ["used"], - "access_tokens": ["used"], - "device_lists_changes_in_room": ["converted_to_destinations"], - "pushers": ["enabled"], + "un_partial_stated_event_stream": ["rejection_status_changed"], + "users_who_share_rooms": ["share_private"], } +# These tables are never deleted from in normal operation [*], so we can resume porting +# over rows from a previous attempt rather than starting from scratch. +# +# [*]: We do delete from many of these tables when purging a room, and +# presumably when purging old events. So we might e.g. +# +# 1. Run portdb and port half of some table. +# 2. Stop portdb. +# 3. Purge something, deleting some of the rows we've ported over. +# 4. Restart portdb. The rows deleted from sqlite are still present in postgres. +# +# But this isn't the end of the world: we should be able to repeat the purge +# on the postgres DB when porting completes. APPEND_ONLY_TABLES = [ + "cache_invalidation_stream_by_instance", + "event_auth", + "event_edges", + "event_json", "event_reference_hashes", + "event_search", + "event_to_state_groups", "events", - "event_json", - "state_events", - "room_memberships", - "topics", - "room_names", - "rooms", + "ex_outlier_stream", "local_media_repository", "local_media_repository_thumbnails", + "presence_stream", + "public_room_list_stream", + "push_rules_stream", + "received_transactions", + "redactions", + "rejections", "remote_media_cache", "remote_media_cache_thumbnails", - "redactions", - "event_edges", - "event_auth", - "received_transactions", + "room_memberships", + "room_names", + "rooms", "sent_transactions", - "transaction_id_to_pdu", - "users", + "state_events", + "state_group_edges", "state_groups", "state_groups_state", - "event_to_state_groups", - "rejections", - "event_search", - "presence_stream", - "push_rules_stream", - "ex_outlier_stream", - "cache_invalidation_stream_by_instance", - "public_room_list_stream", - "state_group_edges", "stream_ordering_to_exterm", + "topics", + "transaction_id_to_pdu", + "un_partial_stated_event_stream", + "users", ] -- cgit 1.5.1 From addd12f16dc35a4f82cb48807719909e7aed9dcb Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 21 Feb 2023 12:26:00 +0000 Subject: Tweak logging for when a worker waits for its view of a replication stream to catch up. (#15120)Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Improve logging messages for the 'wait for repl stream' read-after-write consistency feature * Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) * Update synapse/replication/tcp/client.py Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --------- Signed-off-by: Olivier Wilkinson (reivilibre) Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/15120.misc | 1 + synapse/replication/tcp/client.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 changelog.d/15120.misc (limited to 'synapse') diff --git a/changelog.d/15120.misc b/changelog.d/15120.misc new file mode 100644 index 0000000000..ebbc0c9027 --- /dev/null +++ b/changelog.d/15120.misc @@ -0,0 +1 @@ +Tweak logging for when a worker waits for its view of a replication stream to catch up. \ No newline at end of file diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index cc0528bd8e..424854efbe 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -370,15 +370,23 @@ class ReplicationDataHandler: # We measure here to get in flight counts and average waiting time. with Measure(self._clock, "repl.wait_for_stream_position"): logger.info( - "Waiting for repl stream %r to reach %s (%s)", + "Waiting for repl stream %r to reach %s (%s); currently at: %s", stream_name, position, instance_name, + current_position, ) try: await make_deferred_yieldable(deferred) except defer.TimeoutError: - logger.error("Timed out waiting for stream %s", stream_name) + logger.error( + "Timed out waiting for repl stream %r to reach %s (%s)" + "; currently at: %s", + stream_name, + position, + instance_name, + self._streams[stream_name].current_token(instance_name), + ) return logger.info( -- cgit 1.5.1 From 647ff3ef65e7a54b2719755802b4e6f2f45f5eb6 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 22 Feb 2023 11:07:28 +0000 Subject: Remove unused `room_alias` field from `/createRoom` response (#15093) * Change `create_room` return type * Don't return room alias from /createRoom * Update other callsites * Fix up mypy complaints It looks like new_room_user_id is None iff new_room_id is None. It's a shame we haven't expressed this in a way that mypy can understand. * Changelog --- changelog.d/15093.bugfix | 1 + synapse/handlers/register.py | 4 +-- synapse/handlers/room.py | 38 ++++++++++++------------ synapse/module_api/__init__.py | 6 ++-- synapse/rest/client/room.py | 4 +-- synapse/server_notices/server_notices_manager.py | 3 +- tests/storage/test_cleanup_extrems.py | 8 ++--- tests/storage/test_event_metrics.py | 3 +- tests/storage/test_receipts.py | 10 ++++--- tests/test_federation.py | 2 +- 10 files changed, 40 insertions(+), 39 deletions(-) create mode 100644 changelog.d/15093.bugfix (limited to 'synapse') diff --git a/changelog.d/15093.bugfix b/changelog.d/15093.bugfix new file mode 100644 index 0000000000..00f1c19391 --- /dev/null +++ b/changelog.d/15093.bugfix @@ -0,0 +1 @@ +Remove the unspecced `room_alias` field from the [`/createRoom`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3createroom) response. diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index c611efb760..e4e506e62c 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -476,7 +476,7 @@ class RegistrationHandler: # create room expects the localpart of the room alias config["room_alias_name"] = room_alias.localpart - info, _ = await room_creation_handler.create_room( + room_id, _, _ = await room_creation_handler.create_room( fake_requester, config=config, ratelimit=False, @@ -490,7 +490,7 @@ class RegistrationHandler: user_id, authenticated_entity=self._server_name ), target=UserID.from_string(user_id), - room_id=info["room_id"], + room_id=room_id, # Since it was just created, there are no remote hosts. remote_room_hosts=[], action="join", diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 837dabb3b7..37c87c8351 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -690,13 +690,14 @@ class RoomCreationHandler: config: JsonDict, ratelimit: bool = True, creator_join_profile: Optional[JsonDict] = None, - ) -> Tuple[dict, int]: + ) -> Tuple[str, Optional[RoomAlias], int]: """Creates a new room. Args: - requester: - The user who requested the room creation. - config : A dict of configuration options. + requester: The user who requested the room creation. + config: A dict of configuration options. This will be the body of + a /createRoom request; see + https://spec.matrix.org/latest/client-server-api/#post_matrixclientv3createroom ratelimit: set to False to disable the rate limiter creator_join_profile: @@ -707,14 +708,17 @@ class RoomCreationHandler: `avatar_url` and/or `displayname`. Returns: - First, a dict containing the keys `room_id` and, if an alias - was, requested, `room_alias`. Secondly, the stream_id of the - last persisted event. + A 3-tuple containing: + - the room ID; + - if requested, the room alias, otherwise None; and + - the `stream_id` of the last persisted event. Raises: - SynapseError if the room ID couldn't be stored, 3pid invitation config - validation failed, or something went horribly wrong. - ResourceLimitError if server is blocked to some resource being - exceeded + SynapseError: + if the room ID couldn't be stored, 3pid invitation config + validation failed, or something went horribly wrong. + ResourceLimitError: + if server is blocked to some resource being + exceeded """ user_id = requester.user.to_string() @@ -1024,11 +1028,6 @@ class RoomCreationHandler: last_sent_event_id = member_event_id depth += 1 - result = {"room_id": room_id} - - if room_alias: - result["room_alias"] = room_alias.to_string() - # Always wait for room creation to propagate before returning await self._replication.wait_for_stream_position( self.hs.config.worker.events_shard_config.get_instance(room_id), @@ -1036,7 +1035,7 @@ class RoomCreationHandler: last_stream_id, ) - return result, last_stream_id + return room_id, room_alias, last_stream_id async def _send_events_for_new_room( self, @@ -1825,7 +1824,7 @@ class RoomShutdownHandler: new_room_user_id, authenticated_entity=requester_user_id ) - info, stream_id = await self._room_creation_handler.create_room( + new_room_id, _, stream_id = await self._room_creation_handler.create_room( room_creator_requester, config={ "preset": RoomCreationPreset.PUBLIC_CHAT, @@ -1834,7 +1833,6 @@ class RoomShutdownHandler: }, ratelimit=False, ) - new_room_id = info["room_id"] logger.info( "Shutting down room %r, joining to new room: %r", room_id, new_room_id @@ -1887,6 +1885,7 @@ class RoomShutdownHandler: # Join users to new room if new_room_user_id: + assert new_room_id is not None await self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, @@ -1919,6 +1918,7 @@ class RoomShutdownHandler: aliases_for_room = await self.store.get_aliases_for_room(room_id) + assert new_room_id is not None await self.store.update_aliases_for_room( room_id, new_room_id, requester_user_id ) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index d22dd19d38..1964276a54 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -1576,14 +1576,14 @@ class ModuleApi: ) requester = create_requester(user_id) - room_id_and_alias, _ = await self._hs.get_room_creation_handler().create_room( + room_id, room_alias, _ = await self._hs.get_room_creation_handler().create_room( requester=requester, config=config, ratelimit=ratelimit, creator_join_profile=creator_join_profile, ) - - return room_id_and_alias["room_id"], room_id_and_alias.get("room_alias", None) + room_alias_str = room_alias.to_string() if room_alias else None + return room_id, room_alias_str async def set_displayname( self, diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index d0db85cca7..14b04810a1 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -160,11 +160,11 @@ class RoomCreateRestServlet(TransactionRestServlet): async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - info, _ = await self._room_creation_handler.create_room( + room_id, _, _ = await self._room_creation_handler.create_room( requester, self.get_room_config(request) ) - return 200, info + return 200, {"room_id": room_id} def get_room_config(self, request: Request) -> JsonDict: user_supplied_config = parse_json_object_from_request(request) diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index 564e3705c2..9732dbdb6e 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -178,7 +178,7 @@ class ServerNoticesManager: "avatar_url": self._config.servernotices.server_notices_mxid_avatar_url, } - info, _ = await self._room_creation_handler.create_room( + room_id, _, _ = await self._room_creation_handler.create_room( requester, config={ "preset": RoomCreationPreset.PRIVATE_CHAT, @@ -188,7 +188,6 @@ class ServerNoticesManager: ratelimit=False, creator_join_profile=join_profile, ) - room_id = info["room_id"] self.maybe_get_notice_room_for_user.invalidate((user_id,)) diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py index d570684c99..7de109966d 100644 --- a/tests/storage/test_cleanup_extrems.py +++ b/tests/storage/test_cleanup_extrems.py @@ -43,8 +43,9 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): # Create a test user and room self.user = UserID("alice", "test") self.requester = create_requester(self.user) - info, _ = self.get_success(self.room_creator.create_room(self.requester, {})) - self.room_id = info["room_id"] + self.room_id, _, _ = self.get_success( + self.room_creator.create_room(self.requester, {}) + ) def run_background_update(self) -> None: """Re run the background update to clean up the extremities.""" @@ -275,10 +276,9 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase): self.user = UserID.from_string(self.register_user("user1", "password")) self.token1 = self.login("user1", "password") self.requester = create_requester(self.user) - info, _ = self.get_success( + self.room_id, _, _ = self.get_success( self.room_creator.create_room(self.requester, {"visibility": "public"}) ) - self.room_id = info["room_id"] self.event_creator = homeserver.get_event_creation_handler() homeserver.config.consent.user_consent_version = self.CONSENT_VERSION diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py index a91411168c..6897addbd3 100644 --- a/tests/storage/test_event_metrics.py +++ b/tests/storage/test_event_metrics.py @@ -33,8 +33,7 @@ class ExtremStatisticsTestCase(HomeserverTestCase): events = [(3, 2), (6, 2), (4, 6)] for event_count, extrems in events: - info, _ = self.get_success(room_creator.create_room(requester, {})) - room_id = info["room_id"] + room_id, _, _ = self.get_success(room_creator.create_room(requester, {})) last_event = None diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py index 12c17f1073..1b52eef23f 100644 --- a/tests/storage/test_receipts.py +++ b/tests/storage/test_receipts.py @@ -50,12 +50,14 @@ class ReceiptTestCase(HomeserverTestCase): self.otherRequester = create_requester(self.otherUser) # Create a test room - info, _ = self.get_success(self.room_creator.create_room(self.ourRequester, {})) - self.room_id1 = info["room_id"] + self.room_id1, _, _ = self.get_success( + self.room_creator.create_room(self.ourRequester, {}) + ) # Create a second test room - info, _ = self.get_success(self.room_creator.create_room(self.ourRequester, {})) - self.room_id2 = info["room_id"] + self.room_id2, _, _ = self.get_success( + self.room_creator.create_room(self.ourRequester, {}) + ) # Join the second user to the first room memberEvent, memberEventContext = self.get_success( diff --git a/tests/test_federation.py b/tests/test_federation.py index 82dfd88b99..46d2f99eac 100644 --- a/tests/test_federation.py +++ b/tests/test_federation.py @@ -47,7 +47,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase): room_creator.create_room( our_user, room_creator._presets_dict["public_chat"], ratelimit=False ) - )[0]["room_id"] + )[0] self.store = self.hs.get_datastores().main -- cgit 1.5.1 From 6def779a1a7c49cd10e635986fbfa1e422eb20bf Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 22 Feb 2023 20:29:39 +0100 Subject: Use `json.dump` in `FileExfiltrationWriter` (#15095) To directly write to the open file, instead of writing to an in-memory string first. --- changelog.d/15095.misc | 1 + synapse/app/admin_cmd.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) create mode 100644 changelog.d/15095.misc (limited to 'synapse') diff --git a/changelog.d/15095.misc b/changelog.d/15095.misc new file mode 100644 index 0000000000..a2fafe2fff --- /dev/null +++ b/changelog.d/15095.misc @@ -0,0 +1 @@ +Refactor writing json data in `FileExfiltrationWriter`. \ No newline at end of file diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index ad51f33165..5003777f0d 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -149,7 +149,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(events_file, "a") as f: for event in events: - print(json.dumps(event.get_pdu_json()), file=f) + json.dump(event.get_pdu_json(), fp=f) def write_state( self, room_id: str, event_id: str, state: StateMap[EventBase] @@ -162,7 +162,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(event_file, "a") as f: for event in state.values(): - print(json.dumps(event.get_pdu_json()), file=f) + json.dump(event.get_pdu_json(), fp=f) def write_invite( self, room_id: str, event: EventBase, state: StateMap[EventBase] @@ -178,7 +178,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(invite_state, "a") as f: for event in state.values(): - print(json.dumps(event), file=f) + json.dump(event, fp=f) def write_knock( self, room_id: str, event: EventBase, state: StateMap[EventBase] @@ -194,7 +194,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(knock_state, "a") as f: for event in state.values(): - print(json.dumps(event), file=f) + json.dump(event, fp=f) def write_profile(self, profile: JsonDict) -> None: user_directory = os.path.join(self.base_directory, "user_data") @@ -202,7 +202,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): profile_file = os.path.join(user_directory, "profile") with open(profile_file, "a") as f: - print(json.dumps(profile), file=f) + json.dump(profile, fp=f) def write_devices(self, devices: List[JsonDict]) -> None: user_directory = os.path.join(self.base_directory, "user_data") @@ -211,7 +211,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): for device in devices: with open(device_file, "a") as f: - print(json.dumps(device), file=f) + json.dump(device, fp=f) def write_connections(self, connections: List[JsonDict]) -> None: user_directory = os.path.join(self.base_directory, "user_data") @@ -220,7 +220,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): for connection in connections: with open(connection_file, "a") as f: - print(json.dumps(connection), file=f) + json.dump(connection, fp=f) def write_account_data( self, file_name: str, account_data: Mapping[str, JsonDict] @@ -233,7 +233,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): account_data_file = os.path.join(account_data_directory, file_name) with open(account_data_file, "a") as f: - print(json.dumps(account_data), file=f) + json.dump(account_data, fp=f) def finished(self) -> str: return self.base_directory -- cgit 1.5.1 From 4ed08ff72ef8f1abf85ab22de1e51b570f67b27e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 22 Feb 2023 14:37:18 -0500 Subject: Tighten the default rate limit of creating new devices. (#15135) --- changelog.d/15135.misc | 1 + docs/usage/configuration/config_documentation.md | 6 +++--- synapse/config/ratelimiting.py | 13 +++++++++++-- 3 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 changelog.d/15135.misc (limited to 'synapse') diff --git a/changelog.d/15135.misc b/changelog.d/15135.misc new file mode 100644 index 0000000000..25c4dbffe1 --- /dev/null +++ b/changelog.d/15135.misc @@ -0,0 +1 @@ +Tighten the login ratelimit defaults. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 58c6955689..ab1f9f4963 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1518,11 +1518,11 @@ rc_registration_token_validity: This option specifies several limits for login: * `address` ratelimits login requests based on the client's IP - address. Defaults to `per_second: 0.17`, `burst_count: 3`. + address. Defaults to `per_second: 0.003`, `burst_count: 5`. * `account` ratelimits login requests based on the account the - client is attempting to log into. Defaults to `per_second: 0.17`, - `burst_count: 3`. + client is attempting to log into. Defaults to `per_second: 0.03`, + `burst_count: 5`. * `failed_attempts` ratelimits login requests based on the account the client is attempting to log into, based on the amount of failed login diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 5c13fe428a..b733fac617 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -87,9 +87,18 @@ class RatelimitConfig(Config): defaults={"per_second": 0.1, "burst_count": 5}, ) + # It is reasonable to login with a bunch of devices at once (i.e. when + # setting up an account), but it is *not* valid to continually be + # logging into new devices. rc_login_config = config.get("rc_login", {}) - self.rc_login_address = RatelimitSettings(rc_login_config.get("address", {})) - self.rc_login_account = RatelimitSettings(rc_login_config.get("account", {})) + self.rc_login_address = RatelimitSettings( + rc_login_config.get("address", {}), + defaults={"per_second": 0.003, "burst_count": 5}, + ) + self.rc_login_account = RatelimitSettings( + rc_login_config.get("account", {}), + defaults={"per_second": 0.003, "burst_count": 5}, + ) self.rc_login_failed_attempts = RatelimitSettings( rc_login_config.get("failed_attempts", {}) ) -- cgit 1.5.1 From 9bb2eac71962970d02842bca441f4bcdbbf93a11 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 22 Feb 2023 15:29:09 -0500 Subject: Bump black from 22.12.0 to 23.1.0 (#15103) --- changelog.d/15103.misc | 1 + poetry.lock | 42 ++++++++++++++-------- stubs/sortedcontainers/sortedlist.pyi | 1 - synapse/_scripts/register_new_matrix_user.py | 2 -- synapse/_scripts/synapse_port_db.py | 1 - synapse/_scripts/synctl.py | 1 - synapse/app/_base.py | 2 +- synapse/app/complement_fork_starter.py | 2 +- synapse/app/generic_worker.py | 1 - synapse/app/homeserver.py | 1 - synapse/config/consent.py | 1 - synapse/config/database.py | 1 - synapse/config/homeserver.py | 1 - synapse/config/ratelimiting.py | 1 - synapse/config/repository.py | 1 - synapse/config/server.py | 1 - synapse/config/tls.py | 1 - synapse/crypto/keyring.py | 2 +- synapse/events/third_party_rules.py | 2 -- synapse/federation/send_queue.py | 4 +-- synapse/handlers/appservice.py | 2 +- synapse/handlers/auth.py | 2 -- synapse/handlers/directory.py | 8 +++-- synapse/handlers/e2e_room_keys.py | 1 - synapse/handlers/event_auth.py | 1 - synapse/handlers/initial_sync.py | 1 - synapse/handlers/presence.py | 2 -- synapse/handlers/room.py | 8 +++-- synapse/handlers/room_batch.py | 2 +- synapse/handlers/sync.py | 1 - synapse/logging/opentracing.py | 1 + synapse/metrics/__init__.py | 1 - synapse/metrics/_gc.py | 1 - synapse/push/bulk_push_rule_evaluator.py | 1 - synapse/replication/http/account_data.py | 1 - synapse/replication/http/devices.py | 1 - synapse/replication/tcp/redis.py | 1 - synapse/replication/tcp/streams/events.py | 1 - synapse/rest/admin/rooms.py | 4 --- synapse/rest/admin/users.py | 8 +++-- synapse/rest/client/auth.py | 1 - synapse/rest/client/filter.py | 1 - synapse/rest/client/register.py | 18 ++++++---- synapse/rest/media/v1/_base.py | 1 - synapse/rest/media/v1/thumbnailer.py | 1 - synapse/storage/databases/main/deviceinbox.py | 5 ++- synapse/storage/databases/main/devices.py | 4 +-- synapse/storage/databases/main/e2e_room_keys.py | 2 +- synapse/storage/databases/main/end_to_end_keys.py | 8 ++--- synapse/storage/databases/main/event_federation.py | 1 - synapse/storage/databases/main/events.py | 1 - .../storage/databases/main/events_bg_updates.py | 4 +-- synapse/storage/databases/main/events_worker.py | 2 +- synapse/storage/databases/main/media_repository.py | 1 - synapse/storage/databases/main/pusher.py | 3 -- synapse/storage/databases/main/receipts.py | 1 - synapse/storage/databases/main/room.py | 1 - synapse/storage/databases/main/search.py | 2 -- synapse/storage/databases/main/state.py | 1 - synapse/storage/databases/main/stats.py | 2 +- synapse/storage/databases/main/stream.py | 1 + synapse/storage/databases/main/transactions.py | 1 - synapse/storage/databases/main/user_directory.py | 1 - synapse/storage/databases/state/bg_updates.py | 1 - synapse/storage/databases/state/store.py | 7 ++-- synapse/storage/prepare_database.py | 4 +-- synapse/types/state.py | 2 +- synapse/util/caches/__init__.py | 1 - synapse/util/check_dependencies.py | 2 +- synapse/util/patch_inline_callbacks.py | 1 - synmark/__main__.py | 2 -- synmark/suites/logging.py | 1 - tests/federation/test_complexity.py | 4 --- tests/federation/test_federation_server.py | 1 - tests/handlers/test_sso.py | 1 - tests/handlers/test_stats.py | 1 - tests/http/federation/test_srv_resolver.py | 1 - tests/http/test_client.py | 2 +- tests/push/test_bulk_push_rule_evaluator.py | 1 - tests/push/test_email.py | 2 -- tests/replication/slave/storage/test_events.py | 1 - tests/rest/admin/test_device.py | 3 -- tests/rest/admin/test_media.py | 5 --- tests/rest/admin/test_room.py | 1 - tests/rest/admin/test_server_notice.py | 1 - tests/rest/client/test_account.py | 4 --- tests/rest/client/test_auth.py | 2 -- tests/rest/client/test_capabilities.py | 1 - tests/rest/client/test_consent.py | 1 - tests/rest/client/test_directory.py | 1 - tests/rest/client/test_ephemeral_message.py | 1 - tests/rest/client/test_events.py | 3 -- tests/rest/client/test_filter.py | 1 - tests/rest/client/test_login.py | 2 -- tests/rest/client/test_login_token_request.py | 1 - tests/rest/client/test_presence.py | 1 - tests/rest/client/test_profile.py | 3 -- tests/rest/client/test_register.py | 4 --- tests/rest/client/test_rendezvous.py | 1 - tests/rest/client/test_rooms.py | 14 ++------ tests/rest/client/test_sync.py | 3 -- tests/rest/client/test_third_party_rules.py | 3 ++ tests/rest/media/test_media_retention.py | 1 - tests/rest/media/v1/test_media_storage.py | 3 -- tests/rest/media/v1/test_url_preview.py | 3 -- tests/server_notices/test_consent.py | 2 -- tests/storage/databases/main/test_deviceinbox.py | 1 - tests/storage/databases/main/test_receipts.py | 2 +- tests/storage/databases/main/test_room.py | 1 - tests/storage/test_client_ips.py | 1 - tests/storage/test_event_chain.py | 2 -- tests/storage/test_event_federation.py | 2 +- tests/storage/test_event_push_actions.py | 2 +- tests/storage/test_purge.py | 1 - tests/storage/test_roommember.py | 3 -- tests/storage/test_state.py | 30 ++++++++-------- tests/test_mau.py | 1 - 117 files changed, 108 insertions(+), 218 deletions(-) create mode 100644 changelog.d/15103.misc (limited to 'synapse') diff --git a/changelog.d/15103.misc b/changelog.d/15103.misc new file mode 100644 index 0000000000..65322498c9 --- /dev/null +++ b/changelog.d/15103.misc @@ -0,0 +1 @@ +Bump black from 22.12.0 to 23.1.0. diff --git a/poetry.lock b/poetry.lock index 4d724ab782..8ffdab7a22 100644 --- a/poetry.lock +++ b/poetry.lock @@ -90,32 +90,46 @@ typecheck = ["mypy"] [[package]] name = "black" -version = "22.12.0" +version = "23.1.0" description = "The uncompromising code formatter." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "black-22.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d"}, - {file = "black-22.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351"}, - {file = "black-22.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f"}, - {file = "black-22.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:7412e75863aa5c5411886804678b7d083c7c28421210180d67dfd8cf1221e1f4"}, - {file = "black-22.12.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2"}, - {file = "black-22.12.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1f58cbe16dfe8c12b7434e50ff889fa479072096d79f0a7f25e4ab8e94cd8350"}, - {file = "black-22.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77d86c9f3db9b1bf6761244bc0b3572a546f5fe37917a044e02f3166d5aafa7d"}, - {file = "black-22.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:82d9fe8fee3401e02e79767016b4907820a7dc28d70d137eb397b92ef3cc5bfc"}, - {file = "black-22.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101c69b23df9b44247bd88e1d7e90154336ac4992502d4197bdac35dd7ee3320"}, - {file = "black-22.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:559c7a1ba9a006226f09e4916060982fd27334ae1998e7a38b3f33a37f7a2148"}, - {file = "black-22.12.0-py3-none-any.whl", hash = "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf"}, - {file = "black-22.12.0.tar.gz", hash = "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f"}, + {file = "black-23.1.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221"}, + {file = "black-23.1.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26"}, + {file = "black-23.1.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b"}, + {file = "black-23.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"}, + {file = "black-23.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074"}, + {file = "black-23.1.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27"}, + {file = "black-23.1.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648"}, + {file = "black-23.1.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958"}, + {file = "black-23.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a"}, + {file = "black-23.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481"}, + {file = "black-23.1.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad"}, + {file = "black-23.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8"}, + {file = "black-23.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24"}, + {file = "black-23.1.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6"}, + {file = "black-23.1.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd"}, + {file = "black-23.1.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580"}, + {file = "black-23.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468"}, + {file = "black-23.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753"}, + {file = "black-23.1.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651"}, + {file = "black-23.1.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06"}, + {file = "black-23.1.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739"}, + {file = "black-23.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9"}, + {file = "black-23.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555"}, + {file = "black-23.1.0-py3-none-any.whl", hash = "sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32"}, + {file = "black-23.1.0.tar.gz", hash = "sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac"}, ] [package.dependencies] click = ">=8.0.0" mypy-extensions = ">=0.4.3" +packaging = ">=22.0" pathspec = ">=0.9.0" platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} diff --git a/stubs/sortedcontainers/sortedlist.pyi b/stubs/sortedcontainers/sortedlist.pyi index 1fe1a136f1..0e745c0a79 100644 --- a/stubs/sortedcontainers/sortedlist.pyi +++ b/stubs/sortedcontainers/sortedlist.pyi @@ -29,7 +29,6 @@ _Repr = Callable[[], str] def recursive_repr(fillvalue: str = ...) -> Callable[[_Repr], _Repr]: ... class SortedList(MutableSequence[_T]): - DEFAULT_LOAD_FACTOR: int = ... def __init__( self, diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py index 2b74a40166..19ca399d44 100644 --- a/synapse/_scripts/register_new_matrix_user.py +++ b/synapse/_scripts/register_new_matrix_user.py @@ -47,7 +47,6 @@ def request_registration( _print: Callable[[str], None] = print, exit: Callable[[int], None] = sys.exit, ) -> None: - url = "%s/_synapse/admin/v1/register" % (server_location.rstrip("/"),) # Get the nonce @@ -154,7 +153,6 @@ def register_new_user( def main() -> None: - logging.captureWarnings(True) parser = argparse.ArgumentParser( diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 0d35e0af8f..2c9cbf8b27 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -1205,7 +1205,6 @@ class CursesProgress(Progress): if self.finished: status = "Time spent: %s (Done!)" % (duration_str,) else: - if self.total_processed > 0: left = float(self.total_remaining) / self.total_processed diff --git a/synapse/_scripts/synctl.py b/synapse/_scripts/synctl.py index b4c96ad7f3..077b90935e 100755 --- a/synapse/_scripts/synctl.py +++ b/synapse/_scripts/synctl.py @@ -167,7 +167,6 @@ Worker = collections.namedtuple( def main() -> None: - parser = argparse.ArgumentParser() parser.add_argument( diff --git a/synapse/app/_base.py b/synapse/app/_base.py index a5aa2185a2..28062dd69d 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -213,7 +213,7 @@ def handle_startup_exception(e: Exception) -> NoReturn: def redirect_stdio_to_logs() -> None: streams = [("stdout", LogLevel.info), ("stderr", LogLevel.error)] - for (stream, level) in streams: + for stream, level in streams: oldStream = getattr(sys, stream) loggingFile = LoggingFile( logger=twisted.logger.Logger(namespace=stream), diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py index 920538f44d..c8dc3f9d76 100644 --- a/synapse/app/complement_fork_starter.py +++ b/synapse/app/complement_fork_starter.py @@ -219,7 +219,7 @@ def main() -> None: # memory space and don't need to repeat the work of loading the code! # Instead of using fork() directly, we use the multiprocessing library, # which uses fork() on Unix platforms. - for (func, worker_args) in zip(worker_functions, args_by_worker): + for func, worker_args in zip(worker_functions, args_by_worker): process = multiprocessing.Process( target=_worker_entrypoint, args=(func, proxy_reactor, worker_args) ) diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 946f3a3807..0dec24369a 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -157,7 +157,6 @@ class GenericWorkerServer(HomeServer): DATASTORE_CLASS = GenericWorkerSlavedStore # type: ignore def _listen_http(self, listener_config: ListenerConfig) -> None: - assert listener_config.http_options is not None # We always include a health resource. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 6176a70eb2..b8830b1a9c 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -321,7 +321,6 @@ def setup(config_options: List[str]) -> SynapseHomeServer: and not config.registration.registrations_require_3pid and not config.registration.registration_requires_token ): - raise ConfigError( "You have enabled open registration without any verification. This is a known vector for " "spam and abuse. If you would like to allow public registration, please consider adding email, " diff --git a/synapse/config/consent.py b/synapse/config/consent.py index be74609dc4..5bfd0cbb71 100644 --- a/synapse/config/consent.py +++ b/synapse/config/consent.py @@ -22,7 +22,6 @@ from ._base import Config class ConsentConfig(Config): - section = "consent" def __init__(self, *args: Any): diff --git a/synapse/config/database.py b/synapse/config/database.py index 928fec8dfe..596d8769fe 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -154,7 +154,6 @@ class DatabaseConfig(Config): logger.warning(NON_SQLITE_DATABASE_PATH_WARNING) def set_databasepath(self, database_path: str) -> None: - if database_path != ":memory:": database_path = self.abspath(database_path) diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 4d2b298a70..c205a78039 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -56,7 +56,6 @@ from .workers import WorkerConfig class HomeServerConfig(RootConfig): - config_classes = [ ModulesConfig, ServerConfig, diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index b733fac617..a5514e70a2 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -46,7 +46,6 @@ class RatelimitConfig(Config): section = "ratelimiting" def read_config(self, config: JsonDict, **kwargs: Any) -> None: - # Load the new-style messages config if it exists. Otherwise fall back # to the old method. if "rc_message" in config: diff --git a/synapse/config/repository.py b/synapse/config/repository.py index e4759711ed..2da40c09f0 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -116,7 +116,6 @@ class ContentRepositoryConfig(Config): section = "media" def read_config(self, config: JsonDict, **kwargs: Any) -> None: - # Only enable the media repo if either the media repo is enabled or the # current worker app is the media repo. if ( diff --git a/synapse/config/server.py b/synapse/config/server.py index d4ef9930b0..0e46b849cf 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -735,7 +735,6 @@ class ServerConfig(Config): listeners: Optional[List[dict]], **kwargs: Any, ) -> str: - _, bind_port = parse_and_validate_server_name(server_name) if bind_port is not None: unsecure_port = bind_port - 400 diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 336fe3e0da..318270ebb8 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -30,7 +30,6 @@ class TlsConfig(Config): section = "tls" def read_config(self, config: JsonDict, **kwargs: Any) -> None: - self.tls_certificate_file = self.abspath(config.get("tls_certificate_path")) self.tls_private_key_file = self.abspath(config.get("tls_private_key_path")) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 86cd4af9bd..d710607c63 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -399,7 +399,7 @@ class Keyring: # We now convert the returned list of results into a map from server # name to key ID to FetchKeyResult, to return. to_return: Dict[str, Dict[str, FetchKeyResult]] = {} - for (request, results) in zip(deduped_requests, results_per_request): + for request, results in zip(deduped_requests, results_per_request): to_return_by_server = to_return.setdefault(request.server_name, {}) for key_id, key_result in results.items(): existing = to_return_by_server.get(key_id) diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 97c61cc258..9a25ed419b 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -78,7 +78,6 @@ def load_legacy_third_party_event_rules(hs: "HomeServer") -> None: # correctly, we need to await its result. Therefore it doesn't make a lot of # sense to make it go through the run() wrapper. if f.__name__ == "check_event_allowed": - # We need to wrap check_event_allowed because its old form would return either # a boolean or a dict, but now we want to return the dict separately from the # boolean. @@ -100,7 +99,6 @@ def load_legacy_third_party_event_rules(hs: "HomeServer") -> None: return wrap_check_event_allowed if f.__name__ == "on_create_room": - # We need to wrap on_create_room because its old form would return a boolean # if the room creation is denied, but now we just want it to raise an # exception. diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index d720b5fd3f..3063df7990 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -314,7 +314,7 @@ class FederationRemoteSendQueue(AbstractFederationSender): # stream position. keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} - for ((destination, edu_key), pos) in keyed_edus.items(): + for (destination, edu_key), pos in keyed_edus.items(): rows.append( ( pos, @@ -329,7 +329,7 @@ class FederationRemoteSendQueue(AbstractFederationSender): j = self.edus.bisect_right(to_token) + 1 edus = self.edus.items()[i:j] - for (pos, edu) in edus: + for pos, edu in edus: rows.append((pos, EduRow(edu))) # Sort rows based on pos diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 5d1d21cdc8..ec3ab968e9 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -737,7 +737,7 @@ class ApplicationServicesHandler: ) ret = [] - for (success, result) in results: + for success, result in results: if success: ret.extend(result) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index cf12b55d21..b12bc4c9a3 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -815,7 +815,6 @@ class AuthHandler: now_ms = self._clock.time_msec() if existing_token.expiry_ts is not None and existing_token.expiry_ts < now_ms: - raise SynapseError( HTTPStatus.FORBIDDEN, "The supplied refresh token has expired", @@ -2259,7 +2258,6 @@ class PasswordAuthProvider: async def on_logged_out( self, user_id: str, device_id: Optional[str], access_token: str ) -> None: - # call all of the on_logged_out callbacks for callback in self.on_logged_out_callbacks: try: diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index a5798e9483..1fb23cc9bf 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -497,9 +497,11 @@ class DirectoryHandler: raise SynapseError(403, "Not allowed to publish room") # Check if publishing is blocked by a third party module - allowed_by_third_party_rules = await ( - self.third_party_event_rules.check_visibility_can_be_modified( - room_id, visibility + allowed_by_third_party_rules = ( + await ( + self.third_party_event_rules.check_visibility_can_be_modified( + room_id, visibility + ) ) ) if not allowed_by_third_party_rules: diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index 83f53ceb88..50317ec753 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -188,7 +188,6 @@ class E2eRoomKeysHandler: # XXX: perhaps we should use a finer grained lock here? async with self._upload_linearizer.queue(user_id): - # Check that the version we're trying to upload is the current version try: version_info = await self.store.get_e2e_room_keys_version_info(user_id) diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index 46dd63c3f0..c508861b6a 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -236,7 +236,6 @@ class EventAuthHandler: # in any of them. allowed_rooms = await self.get_rooms_that_allow_join(state_ids) if not await self.is_user_in_rooms(allowed_rooms, user_id): - # If this is a remote request, the user might be in an allowed room # that we do not know about. if get_domain_from_id(user_id) != self._server_name: diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 1a29abde98..aead0b44b9 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -124,7 +124,6 @@ class InitialSyncHandler: as_client_event: bool = True, include_archived: bool = False, ) -> JsonDict: - memberships = [Membership.INVITE, Membership.JOIN] if include_archived: memberships.append(Membership.LEAVE) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 87af31aa27..4ad2233573 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -777,7 +777,6 @@ class PresenceHandler(BasePresenceHandler): ) if self.unpersisted_users_changes: - await self.store.update_presence( [ self.user_to_current_state[user_id] @@ -823,7 +822,6 @@ class PresenceHandler(BasePresenceHandler): now = self.clock.time_msec() with Measure(self.clock, "presence_update_states"): - # NOTE: We purposefully don't await between now and when we've # calculated what we want to do with the new states, to avoid races. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 37c87c8351..a26ec02284 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -868,9 +868,11 @@ class RoomCreationHandler: ) # Check whether this visibility value is blocked by a third party module - allowed_by_third_party_rules = await ( - self.third_party_event_rules.check_visibility_can_be_modified( - room_id, visibility + allowed_by_third_party_rules = ( + await ( + self.third_party_event_rules.check_visibility_can_be_modified( + room_id, visibility + ) ) ) if not allowed_by_third_party_rules: diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index c73d2adaad..5d4ca0e2d2 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -374,7 +374,7 @@ class RoomBatchHandler: # correct stream_ordering as they are backfilled (which decrements). # Events are sorted by (topological_ordering, stream_ordering) # where topological_ordering is just depth. - for (event, context) in reversed(events_to_persist): + for event, context in reversed(events_to_persist): # This call can't raise `PartialStateConflictError` since we forbid # use of the historical batch API during partial state await self.event_creation_handler.handle_new_client_event( diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4e4595312c..fd6d946c37 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1297,7 +1297,6 @@ class SyncHandler: return RoomNotifCounts.empty() with Measure(self.clock, "unread_notifs_for_room_id"): - return await self.store.get_unread_event_push_actions_by_room_for_user( room_id, sync_config.user.to_string(), diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 5aed71262f..c70eee649c 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -524,6 +524,7 @@ def whitelisted_homeserver(destination: str) -> bool: # Start spans and scopes + # Could use kwargs but I want these to be explicit def start_active_span( operation_name: str, diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index b01372565d..8ce5887229 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -87,7 +87,6 @@ class LaterGauge(Collector): ] def collect(self) -> Iterable[Metric]: - g = GaugeMetricFamily(self.name, self.desc, labels=self.labels) try: diff --git a/synapse/metrics/_gc.py b/synapse/metrics/_gc.py index b7d47ce3e7..a22c4e5bbd 100644 --- a/synapse/metrics/_gc.py +++ b/synapse/metrics/_gc.py @@ -139,7 +139,6 @@ def install_gc_manager() -> None: class PyPyGCStats(Collector): def collect(self) -> Iterable[Metric]: - # @stats is a pretty-printer object with __str__() returning a nice table, # plus some fields that contain data from that table. # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB'). diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 5fc38431ba..8f834be774 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -330,7 +330,6 @@ class BulkPushRuleEvaluator: context: EventContext, event_id_to_event: Mapping[str, EventBase], ) -> None: - if ( not event.internal_metadata.is_notifiable() or event.internal_metadata.is_historical() diff --git a/synapse/replication/http/account_data.py b/synapse/replication/http/account_data.py index 2374f810c9..111ec07e64 100644 --- a/synapse/replication/http/account_data.py +++ b/synapse/replication/http/account_data.py @@ -265,7 +265,6 @@ class ReplicationRemoveTagRestServlet(ReplicationEndpoint): @staticmethod async def _serialize_payload(user_id: str, room_id: str, tag: str) -> JsonDict: # type: ignore[override] - return {} async def _handle_request( # type: ignore[override] diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py index ecea6fc915..cc3929dcf5 100644 --- a/synapse/replication/http/devices.py +++ b/synapse/replication/http/devices.py @@ -195,7 +195,6 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint): async def _serialize_payload( # type: ignore[override] user_id: str, device_id: str, keys: JsonDict ) -> JsonDict: - return { "user_id": user_id, "device_id": device_id, diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py index fd1c0ec6af..dfc061eb5e 100644 --- a/synapse/replication/tcp/redis.py +++ b/synapse/replication/tcp/redis.py @@ -328,7 +328,6 @@ class RedisDirectTcpReplicationClientFactory(SynapseRedisFactory): outbound_redis_connection: txredisapi.ConnectionHandler, channel_names: List[str], ): - super().__init__( hs, uuid="subscriber", diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index 14b6705862..ad9b760713 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -139,7 +139,6 @@ class EventsStream(Stream): current_token: Token, target_row_count: int, ) -> StreamUpdateResult: - # the events stream merges together three separate sources: # * new events # * current_state changes diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 1d6e4982d7..4de56bf13f 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -75,7 +75,6 @@ class RoomRestV2Servlet(RestServlet): async def on_DELETE( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: - requester = await self._auth.get_user_by_req(request) await assert_user_is_admin(self._auth, requester) @@ -144,7 +143,6 @@ class DeleteRoomStatusByRoomIdRestServlet(RestServlet): async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: - await assert_requester_is_admin(self._auth, request) if not RoomID.is_valid(room_id): @@ -181,7 +179,6 @@ class DeleteRoomStatusByDeleteIdRestServlet(RestServlet): async def on_GET( self, request: SynapseRequest, delete_id: str ) -> Tuple[int, JsonDict]: - await assert_requester_is_admin(self._auth, request) delete_status = self._pagination_handler.get_delete_status(delete_id) @@ -438,7 +435,6 @@ class RoomStateRestServlet(RestServlet): class JoinRoomAliasServlet(ResolveRoomIdMixin, RestServlet): - PATTERNS = admin_patterns("/join/(?P[^/]*)$") def __init__(self, hs: "HomeServer"): diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 0c0bf540b9..7cc4db20d6 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -683,8 +683,12 @@ class AccountValidityRenewServlet(RestServlet): await assert_requester_is_admin(self.auth, request) if self.account_activity_handler.on_legacy_admin_request_callback: - expiration_ts = await ( - self.account_activity_handler.on_legacy_admin_request_callback(request) + expiration_ts = ( + await ( + self.account_activity_handler.on_legacy_admin_request_callback( + request + ) + ) ) else: body = parse_json_object_from_request(request) diff --git a/synapse/rest/client/auth.py b/synapse/rest/client/auth.py index eb77337044..276a1b405d 100644 --- a/synapse/rest/client/auth.py +++ b/synapse/rest/client/auth.py @@ -97,7 +97,6 @@ class AuthRestServlet(RestServlet): return None async def on_POST(self, request: Request, stagetype: str) -> None: - session = parse_string(request, "session") if not session: raise SynapseError(400, "No session supplied") diff --git a/synapse/rest/client/filter.py b/synapse/rest/client/filter.py index cc1c2f9731..236199897c 100644 --- a/synapse/rest/client/filter.py +++ b/synapse/rest/client/filter.py @@ -79,7 +79,6 @@ class CreateFilterRestServlet(RestServlet): async def on_POST( self, request: SynapseRequest, user_id: str ) -> Tuple[int, JsonDict]: - target_user = UserID.from_string(user_id) requester = await self.auth.get_user_by_req(request) diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index 3cb1e7e375..bce806f2bb 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -628,10 +628,12 @@ class RegisterRestServlet(RestServlet): if not password_hash: raise SynapseError(400, "Missing params: password", Codes.MISSING_PARAM) - desired_username = await ( - self.password_auth_provider.get_username_for_registration( - auth_result, - params, + desired_username = ( + await ( + self.password_auth_provider.get_username_for_registration( + auth_result, + params, + ) ) ) @@ -682,9 +684,11 @@ class RegisterRestServlet(RestServlet): session_id ) - display_name = await ( - self.password_auth_provider.get_displayname_for_registration( - auth_result, params + display_name = ( + await ( + self.password_auth_provider.get_displayname_for_registration( + auth_result, params + ) ) ) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 6e035afcce..ef8334ae25 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -270,7 +270,6 @@ async def respond_with_responder( logger.debug("Responding to media request with responder %s", responder) add_file_headers(request, media_type, file_size, upload_name) try: - await responder.write_to_consumer(request) except Exception as e: # The majority of the time this will be due to the client having gone diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 9480cc5763..f909a4fb9a 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -38,7 +38,6 @@ class ThumbnailError(Exception): class Thumbnailer: - FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"} @staticmethod diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 8e61aba454..0d75d9739a 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -721,8 +721,8 @@ class DeviceInboxWorkerStore(SQLBaseStore): ], ) - for (user_id, messages_by_device) in edu["messages"].items(): - for (device_id, msg) in messages_by_device.items(): + for user_id, messages_by_device in edu["messages"].items(): + for device_id, msg in messages_by_device.items(): with start_active_span("store_outgoing_to_device_message"): set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["sender"]) set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["message_id"]) @@ -959,7 +959,6 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): def _remove_dead_devices_from_device_inbox_txn( txn: LoggingTransaction, ) -> Tuple[int, bool]: - if "max_stream_id" in progress: max_stream_id = progress["max_stream_id"] else: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 1ca66d57d4..0dd15f16ff 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -512,7 +512,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): results.append(("org.matrix.signing_key_update", result)) if issue_8631_logger.isEnabledFor(logging.DEBUG): - for (user_id, edu) in results: + for user_id, edu in results: issue_8631_logger.debug( "device update to %s for %s from %s to %s: %s", destination, @@ -1316,7 +1316,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) """ count = 0 - for (destination, user_id, stream_id, device_id) in rows: + for destination, user_id, stream_id, device_id in rows: txn.execute( delete_sql, (destination, user_id, stream_id, stream_id, device_id) ) diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py index 6240f9a75e..9f8d2e4bea 100644 --- a/synapse/storage/databases/main/e2e_room_keys.py +++ b/synapse/storage/databases/main/e2e_room_keys.py @@ -108,7 +108,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): raise StoreError(404, "No backup with that version exists") values = [] - for (room_id, session_id, room_key) in room_keys: + for room_id, session_id, room_key in room_keys: values.append( ( user_id, diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 2c2d145666..b9c39b1718 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -268,7 +268,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ) # add each cross-signing signature to the correct device in the result dict. - for (user_id, key_id, device_id, signature) in cross_sigs_result: + for user_id, key_id, device_id, signature in cross_sigs_result: target_device_result = result[user_id][device_id] # We've only looked up cross-signatures for non-deleted devices with key # data. @@ -311,7 +311,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker # devices. user_list = [] user_device_list = [] - for (user_id, device_id) in query_list: + for user_id, device_id in query_list: if device_id is None: user_list.append(user_id) else: @@ -353,7 +353,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker txn.execute(sql, query_params) - for (user_id, device_id, display_name, key_json) in txn: + for user_id, device_id, display_name, key_json in txn: assert device_id is not None if include_deleted_devices: deleted_devices.remove((user_id, device_id)) @@ -382,7 +382,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker signature_query_clauses = [] signature_query_params = [] - for (user_id, device_id) in device_query: + for user_id, device_id in device_query: signature_query_clauses.append( "target_user_id = ? AND target_device_id = ? AND user_id = ?" ) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ca780cca36..ff3edeb716 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1612,7 +1612,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas latest_events: List[str], limit: int, ) -> List[str]: - seen_events = set(earliest_events) front = set(latest_events) - seen_events event_results: List[str] = [] diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 7996cbb557..73b8aea16c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -469,7 +469,6 @@ class PersistEventsStore: txn: LoggingTransaction, events: List[EventBase], ) -> None: - # We only care about state events, so this if there are no state events. if not any(e.is_state() for e in events): return diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 584536111d..0a275e6ce6 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -709,7 +709,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): nbrows = 0 last_row_event_id = "" - for (event_id, event_json_raw) in results: + for event_id, event_json_raw in results: try: event_json = db_to_json(event_json_raw) @@ -1167,7 +1167,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): results = list(txn) # (event_id, parent_id, rel_type) for each relation relations_to_insert: List[Tuple[str, str, str]] = [] - for (event_id, event_json_raw) in results: + for event_id, event_json_raw in results: try: event_json = db_to_json(event_json_raw) except Exception as e: diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 6d0ef10258..b7e7498125 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1493,7 +1493,7 @@ class EventsWorkerStore(SQLBaseStore): txn.execute(redactions_sql + clause, args) - for (redacter, redacted) in txn: + for redacter, redacted in txn: d = event_dict.get(redacted) if d: d.redactions.append(redacter) diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index b202c5eb87..fa8be214ce 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -196,7 +196,6 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): def get_local_media_by_user_paginate_txn( txn: LoggingTransaction, ) -> Tuple[List[Dict[str, Any]], int]: - # Set ordering order_by_column = MediaSortOrder(order_by).value diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index df53e726e6..fddbc07afa 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -344,7 +344,6 @@ class PusherWorkerStore(SQLBaseStore): last_user = progress.get("last_user", "") def _delete_pushers(txn: LoggingTransaction) -> int: - sql = """ SELECT name FROM users WHERE deactivated = ? and name > ? @@ -392,7 +391,6 @@ class PusherWorkerStore(SQLBaseStore): last_pusher = progress.get("last_pusher", 0) def _delete_pushers(txn: LoggingTransaction) -> int: - sql = """ SELECT p.id, access_token FROM pushers AS p LEFT JOIN access_tokens AS a ON (p.access_token = a.id) @@ -449,7 +447,6 @@ class PusherWorkerStore(SQLBaseStore): last_pusher = progress.get("last_pusher", 0) def _delete_pushers(txn: LoggingTransaction) -> int: - sql = """ SELECT p.id, p.user_name, p.app_id, p.pushkey FROM pushers AS p diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index dddf49c2d5..92a82240ab 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -887,7 +887,6 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore): def _populate_receipt_event_stream_ordering_txn( txn: LoggingTransaction, ) -> bool: - if "max_stream_id" in progress: max_stream_id = progress["max_stream_id"] else: diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 644bbb8878..39f89291b2 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -2168,7 +2168,6 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): def _get_event_report_txn( txn: LoggingTransaction, report_id: int ) -> Optional[Dict[str, Any]]: - sql = """ SELECT er.id, diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 3fe433f66c..a7aae661d8 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -122,7 +122,6 @@ class SearchWorkerStore(SQLBaseStore): class SearchBackgroundUpdateStore(SearchWorkerStore): - EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" @@ -615,7 +614,6 @@ class SearchStore(SearchBackgroundUpdateStore): """ count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): - # We use CROSS JOIN here to ensure we use the right indexes. # https://sqlite.org/optoverview.html#crossjoin # diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index ba325d390b..ebb2ae964f 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -490,7 +490,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): class MainStateBackgroundUpdateStore(RoomMemberWorkerStore): - CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events" diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index d7b7d0c3c9..d3393d8e49 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -461,7 +461,7 @@ class StatsStore(StateDeltasStore): insert_cols = [] qargs = [] - for (key, val) in chain( + for key, val in chain( keyvalues.items(), absolutes.items(), additive_relatives.items() ): insert_cols.append(key) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 818c46182e..ac5fbf6b86 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -87,6 +87,7 @@ MAX_STREAM_SIZE = 1000 _STREAM_TOKEN = "stream" _TOPOLOGICAL_TOKEN = "topological" + # Used as return values for pagination APIs @attr.s(slots=True, frozen=True, auto_attribs=True) class _EventDictReturn: diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index 6b33d809b6..6d72bd9f67 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -573,7 +573,6 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): def get_destination_rooms_paginate_txn( txn: LoggingTransaction, ) -> Tuple[List[JsonDict], int]: - if direction == Direction.BACKWARDS: order = "DESC" else: diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 30af4b3b6c..c3f2b61bd5 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -98,7 +98,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): async def _populate_user_directory_createtables( self, progress: JsonDict, batch_size: int ) -> int: - # Get all the rooms that we want to process. def _make_staging_area(txn: LoggingTransaction) -> None: sql = ( diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index d743282f13..097dea5182 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -251,7 +251,6 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): - STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 1a7232b276..89b1faa6c8 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -257,14 +257,11 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): member_filter, non_member_filter = state_filter.get_member_split() # Now we look them up in the member and non-member caches - ( - non_member_state, - incomplete_groups_nm, - ) = self._get_state_for_groups_using_cache( + non_member_state, incomplete_groups_nm = self._get_state_for_groups_using_cache( groups, self._state_group_cache, state_filter=non_member_filter ) - (member_state, incomplete_groups_m,) = self._get_state_for_groups_using_cache( + member_state, incomplete_groups_m = self._get_state_for_groups_using_cache( groups, self._state_group_members_cache, state_filter=member_filter ) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 6c335a9315..2a1c6fa31b 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -563,7 +563,7 @@ def _apply_module_schemas( """ # This is the old way for password_auth_provider modules to make changes # to the database. This should instead be done using the module API - for (mod, _config) in config.authproviders.password_providers: + for mod, _config in config.authproviders.password_providers: if not hasattr(mod, "get_db_schema_files"): continue modname = ".".join((mod.__module__, mod.__name__)) @@ -591,7 +591,7 @@ def _apply_module_schema_files( (modname,), ) applied_deltas = {d for d, in cur} - for (name, stream) in names_and_streams: + for name, stream in names_and_streams: if name in applied_deltas: continue diff --git a/synapse/types/state.py b/synapse/types/state.py index 743a4f9217..4b3071acce 100644 --- a/synapse/types/state.py +++ b/synapse/types/state.py @@ -120,7 +120,7 @@ class StateFilter: def to_types(self) -> Iterable[Tuple[str, Optional[str]]]: """The inverse to `from_types`.""" - for (event_type, state_keys) in self.types.items(): + for event_type, state_keys in self.types.items(): if state_keys is None: yield event_type, None else: diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 9387632d0d..6ffa56217e 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -98,7 +98,6 @@ class EvictionReason(Enum): @attr.s(slots=True, auto_attribs=True) class CacheMetric: - _cache: Sized _cache_type: str _cache_name: str diff --git a/synapse/util/check_dependencies.py b/synapse/util/check_dependencies.py index 3b1e205700..1c0fde4966 100644 --- a/synapse/util/check_dependencies.py +++ b/synapse/util/check_dependencies.py @@ -183,7 +183,7 @@ def check_requirements(extra: Optional[str] = None) -> None: deps_unfulfilled = [] errors = [] - for (requirement, must_be_installed) in dependencies: + for requirement, must_be_installed in dependencies: try: dist: metadata.Distribution = metadata.distribution(requirement.name) except metadata.PackageNotFoundError: diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py index f97f98a057..d00d34e652 100644 --- a/synapse/util/patch_inline_callbacks.py +++ b/synapse/util/patch_inline_callbacks.py @@ -211,7 +211,6 @@ def _check_yield_points( result = Failure() if current_context() != expected_context: - # This happens because the context is lost sometime *after* the # previous yield and *after* the current yield. E.g. the # deferred we waited on didn't follow the rules, or we forgot to diff --git a/synmark/__main__.py b/synmark/__main__.py index 35a59e347a..19de639187 100644 --- a/synmark/__main__.py +++ b/synmark/__main__.py @@ -34,12 +34,10 @@ def make_test(main): """ def _main(loops): - reactor = make_reactor() file_out = StringIO() with redirect_stderr(file_out): - d = Deferred() d.addCallback(lambda _: ensureDeferred(main(reactor, loops))) diff --git a/synmark/suites/logging.py b/synmark/suites/logging.py index 9419892e95..8beb077e0a 100644 --- a/synmark/suites/logging.py +++ b/synmark/suites/logging.py @@ -30,7 +30,6 @@ from synapse.util import Clock class LineCounter(LineOnlyReceiver): - delimiter = b"\n" def __init__(self, *args, **kwargs): diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py index 35dd9a20df..33af8770fd 100644 --- a/tests/federation/test_complexity.py +++ b/tests/federation/test_complexity.py @@ -24,7 +24,6 @@ from tests.test_utils import make_awaitable class RoomComplexityTests(unittest.FederatingHomeserverTestCase): - servlets = [ admin.register_servlets, room.register_servlets, @@ -37,7 +36,6 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase): return config def test_complexity_simple(self) -> None: - u1 = self.register_user("u1", "pass") u1_token = self.login("u1", "pass") @@ -71,7 +69,6 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase): self.assertEqual(complexity, 1.23) def test_join_too_large(self) -> None: - u1 = self.register_user("u1", "pass") handler = self.hs.get_room_member_handler() @@ -131,7 +128,6 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase): self.assertEqual(f.value.errcode, Codes.RESOURCE_LIMIT_EXCEEDED) def test_join_too_large_once_joined(self) -> None: - u1 = self.register_user("u1", "pass") u1_token = self.login("u1", "pass") diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py index bba6469b55..6c7738d810 100644 --- a/tests/federation/test_federation_server.py +++ b/tests/federation/test_federation_server.py @@ -34,7 +34,6 @@ from tests.unittest import override_config class FederationServerTests(unittest.FederatingHomeserverTestCase): - servlets = [ admin.register_servlets, room.register_servlets, diff --git a/tests/handlers/test_sso.py b/tests/handlers/test_sso.py index 137deab138..d6f43a98fc 100644 --- a/tests/handlers/test_sso.py +++ b/tests/handlers/test_sso.py @@ -113,7 +113,6 @@ async def mock_get_file( headers: Optional[RawHeaders] = None, is_allowed_content_type: Optional[Callable[[str], bool]] = None, ) -> Tuple[int, Dict[bytes, List[bytes]], str, int]: - fake_response = FakeResponse(code=404) if url == "http://my.server/me.png": fake_response = FakeResponse( diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index f1a50c5bcb..d11ded6c5b 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -31,7 +31,6 @@ EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM = 6 class StatsRoomTests(unittest.HomeserverTestCase): - servlets = [ admin.register_servlets_for_client_rest_resource, room.register_servlets, diff --git a/tests/http/federation/test_srv_resolver.py b/tests/http/federation/test_srv_resolver.py index 7748f56ee6..6ab13357f9 100644 --- a/tests/http/federation/test_srv_resolver.py +++ b/tests/http/federation/test_srv_resolver.py @@ -46,7 +46,6 @@ class SrvResolverTestCase(unittest.TestCase): @defer.inlineCallbacks def do_lookup() -> Generator["Deferred[object]", object, List[Server]]: - with LoggingContext("one") as ctx: resolve_d = resolver.resolve_service(service_name) result: List[Server] diff --git a/tests/http/test_client.py b/tests/http/test_client.py index 9cfe1ad0de..f6d6684985 100644 --- a/tests/http/test_client.py +++ b/tests/http/test_client.py @@ -149,7 +149,7 @@ class BlacklistingAgentTest(TestCase): self.allowed_domain, self.allowed_ip = b"allowed.test", b"5.1.1.1" # Configure the reactor's DNS resolver. - for (domain, ip) in ( + for domain, ip in ( (self.safe_domain, self.safe_ip), (self.unsafe_domain, self.unsafe_ip), (self.allowed_domain, self.allowed_ip), diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 199e3d7b70..dce6899e78 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -33,7 +33,6 @@ from tests.unittest import HomeserverTestCase, override_config class TestBulkPushRuleEvaluator(HomeserverTestCase): - servlets = [ admin.register_servlets_for_client_rest_resource, room.register_servlets, diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 7563f33fdc..0a3aca5c50 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -39,7 +39,6 @@ class _User: class EmailPusherTests(HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, room.register_servlets, @@ -48,7 +47,6 @@ class EmailPusherTests(HomeserverTestCase): hijack_auth = False def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() config["email"] = { "enable_notifs": True, diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index ddca9d696c..57c781a0c3 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -64,7 +64,6 @@ def patch__eq__(cls: object) -> Callable[[], None]: class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase): - STORE_TYPE = EventsWorkerStore def setUp(self) -> None: diff --git a/tests/rest/admin/test_device.py b/tests/rest/admin/test_device.py index 03f2112b07..aaa488bced 100644 --- a/tests/rest/admin/test_device.py +++ b/tests/rest/admin/test_device.py @@ -28,7 +28,6 @@ from tests import unittest class DeviceRestTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, @@ -291,7 +290,6 @@ class DeviceRestTestCase(unittest.HomeserverTestCase): class DevicesRestTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, @@ -415,7 +413,6 @@ class DevicesRestTestCase(unittest.HomeserverTestCase): class DeleteDevicesRestTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index db77a45ae3..f41319a5b6 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -34,7 +34,6 @@ INVALID_TIMESTAMP_IN_S = 1893456000 # 2030-01-01 in seconds class DeleteMediaByIDTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, synapse.rest.admin.register_servlets_for_media_repo, @@ -196,7 +195,6 @@ class DeleteMediaByIDTestCase(unittest.HomeserverTestCase): class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, synapse.rest.admin.register_servlets_for_media_repo, @@ -594,7 +592,6 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): class QuarantineMediaByIDTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, synapse.rest.admin.register_servlets_for_media_repo, @@ -724,7 +721,6 @@ class QuarantineMediaByIDTestCase(unittest.HomeserverTestCase): class ProtectMediaByIDTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, synapse.rest.admin.register_servlets_for_media_repo, @@ -821,7 +817,6 @@ class ProtectMediaByIDTestCase(unittest.HomeserverTestCase): class PurgeMediaCacheTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, synapse.rest.admin.register_servlets_for_media_repo, diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 453a6e979c..9dbb778679 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -1990,7 +1990,6 @@ class RoomMessagesTestCase(unittest.HomeserverTestCase): class JoinAliasRoomTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, room.register_servlets, diff --git a/tests/rest/admin/test_server_notice.py b/tests/rest/admin/test_server_notice.py index f71ff46d87..28b999573e 100644 --- a/tests/rest/admin/test_server_notice.py +++ b/tests/rest/admin/test_server_notice.py @@ -28,7 +28,6 @@ from tests.unittest import override_config class ServerNoticeTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py index e2ee1a1766..2b05dffc7d 100644 --- a/tests/rest/client/test_account.py +++ b/tests/rest/client/test_account.py @@ -40,7 +40,6 @@ from tests.unittest import override_config class PasswordResetTestCase(unittest.HomeserverTestCase): - servlets = [ account.register_servlets, synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -408,7 +407,6 @@ class PasswordResetTestCase(unittest.HomeserverTestCase): class DeactivateTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, login.register_servlets, @@ -492,7 +490,6 @@ class DeactivateTestCase(unittest.HomeserverTestCase): class WhoamiTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, login.register_servlets, @@ -567,7 +564,6 @@ class WhoamiTestCase(unittest.HomeserverTestCase): class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): - servlets = [ account.register_servlets, login.register_servlets, diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index a144610078..0d8fe77b88 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -52,7 +52,6 @@ class DummyRecaptchaChecker(UserInteractiveAuthChecker): class FallbackAuthTests(unittest.HomeserverTestCase): - servlets = [ auth.register_servlets, register.register_servlets, @@ -60,7 +59,6 @@ class FallbackAuthTests(unittest.HomeserverTestCase): hijack_auth = False def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() config["enable_registration_captcha"] = True diff --git a/tests/rest/client/test_capabilities.py b/tests/rest/client/test_capabilities.py index d1751e1557..c16e8d43f4 100644 --- a/tests/rest/client/test_capabilities.py +++ b/tests/rest/client/test_capabilities.py @@ -26,7 +26,6 @@ from tests.unittest import override_config class CapabilitiesTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, capabilities.register_servlets, diff --git a/tests/rest/client/test_consent.py b/tests/rest/client/test_consent.py index b1ca81a911..bb845179d3 100644 --- a/tests/rest/client/test_consent.py +++ b/tests/rest/client/test_consent.py @@ -38,7 +38,6 @@ class ConsentResourceTestCase(unittest.HomeserverTestCase): hijack_auth = False def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() config["form_secret"] = "123abc" diff --git a/tests/rest/client/test_directory.py b/tests/rest/client/test_directory.py index 7a88aa2cda..6490e883bf 100644 --- a/tests/rest/client/test_directory.py +++ b/tests/rest/client/test_directory.py @@ -28,7 +28,6 @@ from tests.unittest import override_config class DirectoryTestCase(unittest.HomeserverTestCase): - servlets = [ admin.register_servlets_for_client_rest_resource, directory.register_servlets, diff --git a/tests/rest/client/test_ephemeral_message.py b/tests/rest/client/test_ephemeral_message.py index 9fa1f82dfe..f31ebc8021 100644 --- a/tests/rest/client/test_ephemeral_message.py +++ b/tests/rest/client/test_ephemeral_message.py @@ -26,7 +26,6 @@ from tests import unittest class EphemeralMessageTestCase(unittest.HomeserverTestCase): - user_id = "@user:test" servlets = [ diff --git a/tests/rest/client/test_events.py b/tests/rest/client/test_events.py index a9b7db9db2..54df2a252c 100644 --- a/tests/rest/client/test_events.py +++ b/tests/rest/client/test_events.py @@ -38,7 +38,6 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): ] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() config["enable_registration_captcha"] = False config["enable_registration"] = True @@ -51,7 +50,6 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): return hs def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - # register an account self.user_id = self.register_user("sid1", "pass") self.token = self.login(self.user_id, "pass") @@ -142,7 +140,6 @@ class GetEventsTestCase(unittest.HomeserverTestCase): ] def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - # register an account self.user_id = self.register_user("sid1", "pass") self.token = self.login(self.user_id, "pass") diff --git a/tests/rest/client/test_filter.py b/tests/rest/client/test_filter.py index 830762fd53..91678abf13 100644 --- a/tests/rest/client/test_filter.py +++ b/tests/rest/client/test_filter.py @@ -25,7 +25,6 @@ PATH_PREFIX = "/_matrix/client/v2_alpha" class FilterTestCase(unittest.HomeserverTestCase): - user_id = "@apple:test" hijack_auth = True EXAMPLE_FILTER = {"room": {"timeline": {"types": ["m.room.message"]}}} diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index ff5baa9f0a..62acf4f44e 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -89,7 +89,6 @@ ADDITIONAL_LOGIN_FLOWS = [ class LoginRestServletTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, login.register_servlets, @@ -737,7 +736,6 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): class CASTestCase(unittest.HomeserverTestCase): - servlets = [ login.register_servlets, ] diff --git a/tests/rest/client/test_login_token_request.py b/tests/rest/client/test_login_token_request.py index 6aedc1a11c..b8187db982 100644 --- a/tests/rest/client/test_login_token_request.py +++ b/tests/rest/client/test_login_token_request.py @@ -26,7 +26,6 @@ endpoint = "/_matrix/client/unstable/org.matrix.msc3882/login/token" class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase): - servlets = [ login.register_servlets, admin.register_servlets, diff --git a/tests/rest/client/test_presence.py b/tests/rest/client/test_presence.py index 67e16880e6..dcbb125a3b 100644 --- a/tests/rest/client/test_presence.py +++ b/tests/rest/client/test_presence.py @@ -35,7 +35,6 @@ class PresenceTestCase(unittest.HomeserverTestCase): servlets = [presence.register_servlets] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - self.presence_handler = Mock(spec=PresenceHandler) self.presence_handler.set_state.return_value = make_awaitable(None) diff --git a/tests/rest/client/test_profile.py b/tests/rest/client/test_profile.py index 8de5a342ae..27c93ad761 100644 --- a/tests/rest/client/test_profile.py +++ b/tests/rest/client/test_profile.py @@ -30,7 +30,6 @@ from tests import unittest class ProfileTestCase(unittest.HomeserverTestCase): - servlets = [ admin.register_servlets_for_client_rest_resource, login.register_servlets, @@ -324,7 +323,6 @@ class ProfileTestCase(unittest.HomeserverTestCase): class ProfilesRestrictedTestCase(unittest.HomeserverTestCase): - servlets = [ admin.register_servlets_for_client_rest_resource, login.register_servlets, @@ -404,7 +402,6 @@ class ProfilesRestrictedTestCase(unittest.HomeserverTestCase): class OwnProfileUnrestrictedTestCase(unittest.HomeserverTestCase): - servlets = [ admin.register_servlets_for_client_rest_resource, login.register_servlets, diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py index 4c561f9525..b228dba861 100644 --- a/tests/rest/client/test_register.py +++ b/tests/rest/client/test_register.py @@ -40,7 +40,6 @@ from tests.unittest import override_config class RegisterRestServletTestCase(unittest.HomeserverTestCase): - servlets = [ login.register_servlets, register.register_servlets, @@ -797,7 +796,6 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): class AccountValidityTestCase(unittest.HomeserverTestCase): - servlets = [ register.register_servlets, synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -913,7 +911,6 @@ class AccountValidityTestCase(unittest.HomeserverTestCase): class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): - servlets = [ register.register_servlets, synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -1132,7 +1129,6 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase): class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase): - servlets = [synapse.rest.admin.register_servlets_for_client_rest_resource] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: diff --git a/tests/rest/client/test_rendezvous.py b/tests/rest/client/test_rendezvous.py index c0eb5d01a6..8dbd64be55 100644 --- a/tests/rest/client/test_rendezvous.py +++ b/tests/rest/client/test_rendezvous.py @@ -25,7 +25,6 @@ endpoint = "/_matrix/client/unstable/org.matrix.msc3886/rendezvous" class RendezvousServletTestCase(unittest.HomeserverTestCase): - servlets = [ rendezvous.register_servlets, ] diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index cfad182b2f..4dd763096d 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -65,7 +65,6 @@ class RoomBase(unittest.HomeserverTestCase): servlets = [room.register_servlets, room.register_deprecated_servlets] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - self.hs = self.setup_test_homeserver( "red", federation_http_client=None, @@ -92,7 +91,6 @@ class RoomPermissionsTestCase(RoomBase): rmcreator_id = "@notme:red" def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - self.helper.auth_user_id = self.rmcreator_id # create some rooms under the name rmcreator_id self.uncreated_rmid = "!aa:test" @@ -1127,7 +1125,6 @@ class RoomInviteRatelimitTestCase(RoomBase): class RoomJoinTestCase(RoomBase): - servlets = [ admin.register_servlets, login.register_servlets, @@ -2102,7 +2099,6 @@ class RoomSearchTestCase(unittest.HomeserverTestCase): hijack_auth = False def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - # Register the user who does the searching self.user_id2 = self.register_user("user", "pass") self.access_token = self.login("user", "pass") @@ -2195,7 +2191,6 @@ class RoomSearchTestCase(unittest.HomeserverTestCase): class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, room.register_servlets, @@ -2203,7 +2198,6 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase): ] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - self.url = b"/_matrix/client/r0/publicRooms" config = self.default_config() @@ -2225,7 +2219,6 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase): class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, room.register_servlets, @@ -2233,7 +2226,6 @@ class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase): ] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() config["allow_public_rooms_without_auth"] = True self.hs = self.setup_test_homeserver(config=config) @@ -2414,7 +2406,6 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, room.register_servlets, @@ -2983,7 +2974,6 @@ class RelationsTestCase(PaginationTestCase): class ContextTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, room.register_servlets, @@ -3359,7 +3349,6 @@ class RoomCanonicalAliasTestCase(unittest.HomeserverTestCase): class ThreepidInviteTestCase(unittest.HomeserverTestCase): - servlets = [ admin.register_servlets, login.register_servlets, @@ -3438,7 +3427,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): """ Test allowing/blocking threepid invites with a spam-check module. - In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`.""" + In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`. + """ # Mock a few functions to prevent the test from failing due to failing to talk to # a remote IS. We keep the mock for make_and_store_3pid_invite around so we # can check its call_count later on during the test. diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index b9047194dd..9c876c7a32 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -41,7 +41,6 @@ from tests.server import TimedOutException class FilterTestCase(unittest.HomeserverTestCase): - user_id = "@apple:test" servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -191,7 +190,6 @@ class SyncFilterTestCase(unittest.HomeserverTestCase): class SyncTypingTests(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, room.register_servlets, @@ -892,7 +890,6 @@ class DeviceListSyncTestCase(unittest.HomeserverTestCase): class ExcludeRoomTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 5fa3440691..c0f93f898a 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -137,6 +137,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): """Tests that a forbidden event is forbidden from being sent, but an allowed one can be sent. """ + # patch the rules module with a Mock which will return False for some event # types async def check( @@ -243,6 +244,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): def test_modify_event(self) -> None: """The module can return a modified version of the event""" + # first patch the event checker so that it will modify the event async def check( ev: EventBase, state: StateMap[EventBase] @@ -275,6 +277,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): def test_message_edit(self) -> None: """Ensure that the module doesn't cause issues with edited messages.""" + # first patch the event checker so that it will modify the event async def check( ev: EventBase, state: StateMap[EventBase] diff --git a/tests/rest/media/test_media_retention.py b/tests/rest/media/test_media_retention.py index 23f227aed6..b59d9dfd4d 100644 --- a/tests/rest/media/test_media_retention.py +++ b/tests/rest/media/test_media_retention.py @@ -31,7 +31,6 @@ from tests.utils import MockClock class MediaRetentionTestCase(unittest.HomeserverTestCase): - ONE_DAY_IN_MS = 24 * 60 * 60 * 1000 THIRTY_DAYS_IN_MS = 30 * ONE_DAY_IN_MS diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 17a3b06a8e..8ed27179c4 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -52,7 +52,6 @@ from tests.utils import default_config class MediaStorageTests(unittest.HomeserverTestCase): - needs_threadpool = True def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: @@ -207,7 +206,6 @@ class MediaRepoTests(unittest.HomeserverTestCase): user_id = "@test:user" def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - self.fetches: List[ Tuple[ "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]", @@ -268,7 +266,6 @@ class MediaRepoTests(unittest.HomeserverTestCase): return hs def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - media_resource = hs.get_media_repository_resource() self.download_resource = media_resource.children[b"download"] self.thumbnail_resource = media_resource.children[b"thumbnail"] diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 2c321f8d04..6fcf60ce19 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -58,7 +58,6 @@ class URLPreviewTests(unittest.HomeserverTestCase): ) def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() config["url_preview_enabled"] = True config["max_spider_size"] = 9999999 @@ -118,7 +117,6 @@ class URLPreviewTests(unittest.HomeserverTestCase): return hs def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - self.media_repo = hs.get_media_repository_resource() self.preview_url = self.media_repo.children[b"preview_url"] @@ -133,7 +131,6 @@ class URLPreviewTests(unittest.HomeserverTestCase): addressTypes: Optional[Sequence[Type[IAddress]]] = None, transportSemantics: str = "TCP", ) -> IResolutionReceiver: - resolution = HostResolution(hostName) resolutionReceiver.resolutionBegan(resolution) if hostName not in self.lookups: diff --git a/tests/server_notices/test_consent.py b/tests/server_notices/test_consent.py index 6540ed53f1..3fdf5a6d52 100644 --- a/tests/server_notices/test_consent.py +++ b/tests/server_notices/test_consent.py @@ -25,7 +25,6 @@ from tests import unittest class ConsentNoticesTests(unittest.HomeserverTestCase): - servlets = [ sync.register_servlets, synapse.rest.admin.register_servlets_for_client_rest_resource, @@ -34,7 +33,6 @@ class ConsentNoticesTests(unittest.HomeserverTestCase): ] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - tmpdir = self.mktemp() os.mkdir(tmpdir) self.consent_notice_message = "consent %(consent_uri)s" diff --git a/tests/storage/databases/main/test_deviceinbox.py b/tests/storage/databases/main/test_deviceinbox.py index 373707b275..b6d5c474b0 100644 --- a/tests/storage/databases/main/test_deviceinbox.py +++ b/tests/storage/databases/main/test_deviceinbox.py @@ -23,7 +23,6 @@ from tests.unittest import HomeserverTestCase class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): - servlets = [ admin.register_servlets, devices.register_servlets, diff --git a/tests/storage/databases/main/test_receipts.py b/tests/storage/databases/main/test_receipts.py index ac77aec003..71db47405e 100644 --- a/tests/storage/databases/main/test_receipts.py +++ b/tests/storage/databases/main/test_receipts.py @@ -26,7 +26,6 @@ from tests.unittest import HomeserverTestCase class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase): - servlets = [ admin.register_servlets, room.register_servlets, @@ -62,6 +61,7 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase): keys and expected receipt key-values after duplicate receipts have been removed. """ + # First, undo the background update. def drop_receipts_unique_index(txn: LoggingTransaction) -> None: txn.execute(f"DROP INDEX IF EXISTS {index_name}") diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py index 3108ca3444..dbd8f3a85e 100644 --- a/tests/storage/databases/main/test_room.py +++ b/tests/storage/databases/main/test_room.py @@ -27,7 +27,6 @@ from tests.unittest import HomeserverTestCase class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase): - servlets = [ admin.register_servlets, room.register_servlets, diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 7f7f4ef892..cd0079871c 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -656,7 +656,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): class ClientIpAuthTestCase(unittest.HomeserverTestCase): - servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index a10e5fa8b1..73d11e7786 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -417,7 +417,6 @@ class EventChainStoreTestCase(HomeserverTestCase): def fetch_chains( self, events: List[EventBase] ) -> Tuple[Dict[str, Tuple[int, int]], _LinkMap]: - # Fetch the map from event ID -> (chain ID, sequence number) rows = self.get_success( self.store.db_pool.simple_select_many_batch( @@ -492,7 +491,6 @@ class LinkMapTestCase(unittest.TestCase): class EventChainBackgroundUpdateTestCase(HomeserverTestCase): - servlets = [ admin.register_servlets, room.register_servlets, diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 8fc7936ab0..3e1984c15c 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -672,7 +672,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): complete_event_dict_map: Dict[str, JsonDict] = {} stream_ordering = 0 - for (event_id, prev_event_ids) in event_graph.items(): + for event_id, prev_event_ids in event_graph.items(): depth = depth_map[event_id] complete_event_dict_map[event_id] = { diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 76c06a9d1e..aa19c3bd30 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -774,7 +774,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual(r, 3) # add a bunch of dummy events to the events table - for (stream_ordering, ts) in ( + for stream_ordering, ts in ( (3, 110), (4, 120), (5, 120), diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py index d8f42c5d05..857e2caf2e 100644 --- a/tests/storage/test_purge.py +++ b/tests/storage/test_purge.py @@ -23,7 +23,6 @@ from tests.unittest import HomeserverTestCase class PurgeTests(HomeserverTestCase): - user_id = "@red:server" servlets = [room.register_servlets] diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 8794401823..f4c4661aaf 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -27,7 +27,6 @@ from tests.test_utils import event_injection class RoomMemberStoreTestCase(unittest.HomeserverTestCase): - servlets = [ login.register_servlets, register_servlets_for_client_rest_resource, @@ -35,7 +34,6 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): ] def prepare(self, reactor: MemoryReactor, clock: Clock, hs: TestHomeServer) -> None: # type: ignore[override] - # We can't test the RoomMemberStore on its own without the other event # storage logic self.store = hs.get_datastores().main @@ -48,7 +46,6 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): self.u_charlie = UserID.from_string("@charlie:elsewhere") def test_one_member(self) -> None: - # Alice creates the room, and is automatically joined self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index f730b888f7..e82c03f597 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -242,7 +242,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters out members # with types=[] - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_cache, group, state_filter=StateFilter( @@ -259,7 +259,7 @@ class StateStoreTestCase(HomeserverTestCase): state_dict, ) - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( @@ -272,7 +272,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters in members # with wildcard types - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_cache, group, state_filter=StateFilter( @@ -289,7 +289,7 @@ class StateStoreTestCase(HomeserverTestCase): state_dict, ) - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( @@ -309,7 +309,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters in members # with specific types - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_cache, group, state_filter=StateFilter( @@ -327,7 +327,7 @@ class StateStoreTestCase(HomeserverTestCase): state_dict, ) - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( @@ -341,7 +341,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters in members # with specific types - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( @@ -392,7 +392,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters out members # with types=[] room_id = self.room.to_string() - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_cache, group, state_filter=StateFilter( @@ -404,7 +404,7 @@ class StateStoreTestCase(HomeserverTestCase): self.assertDictEqual({}, state_dict) room_id = self.room.to_string() - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( @@ -417,7 +417,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters in members # wildcard types - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_cache, group, state_filter=StateFilter( @@ -428,7 +428,7 @@ class StateStoreTestCase(HomeserverTestCase): self.assertEqual(is_all, False) self.assertDictEqual({}, state_dict) - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( @@ -447,7 +447,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters in members # with specific types - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_cache, group, state_filter=StateFilter( @@ -459,7 +459,7 @@ class StateStoreTestCase(HomeserverTestCase): self.assertEqual(is_all, False) self.assertDictEqual({}, state_dict) - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( @@ -473,7 +473,7 @@ class StateStoreTestCase(HomeserverTestCase): # test _get_state_for_group_using_cache correctly filters in members # with specific types - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_cache, group, state_filter=StateFilter( @@ -485,7 +485,7 @@ class StateStoreTestCase(HomeserverTestCase): self.assertEqual(is_all, False) self.assertDictEqual({}, state_dict) - (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( + state_dict, is_all = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( diff --git a/tests/test_mau.py b/tests/test_mau.py index 4e7665a22b..ff21098a59 100644 --- a/tests/test_mau.py +++ b/tests/test_mau.py @@ -32,7 +32,6 @@ from tests.utils import default_config class TestMauLimit(unittest.HomeserverTestCase): - servlets = [register.register_servlets, sync.register_servlets] def default_config(self) -> JsonDict: -- cgit 1.5.1 From a068ad7dd4910c81bb0886fbf986dde126eeb4ee Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 23 Feb 2023 19:14:17 +0100 Subject: Add information on uploaded media to user export command. (#15107) --- changelog.d/15107.feature | 1 + docs/usage/administration/admin_faq.md | 74 ++++++++++++++++++++++++++-------- synapse/app/admin_cmd.py | 10 +++++ synapse/handlers/admin.py | 38 +++++++++++++++++ tests/handlers/test_admin.py | 29 +++++++++++++ 5 files changed, 136 insertions(+), 16 deletions(-) create mode 100644 changelog.d/15107.feature (limited to 'synapse') diff --git a/changelog.d/15107.feature b/changelog.d/15107.feature new file mode 100644 index 0000000000..2bdb6a29fc --- /dev/null +++ b/changelog.d/15107.feature @@ -0,0 +1 @@ +Add media information to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.79/usage/administration/admin_faq.html#how-can-i-export-user-data). \ No newline at end of file diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 925e1d175e..28c3dd53a5 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -70,13 +70,55 @@ output-directory │ ├───state │ ├───invite_state │ └───knock_state -└───user_data - ├───account_data - │ ├───global - │ └─── - ├───connections - ├───devices - └───profile +├───user_data +│ ├───account_data +│ │ ├───global +│ │ └─── +│ ├───connections +│ ├───devices +│ └───profile +└───media_ids + └─── +``` + +The `media_ids` folder contains only the metadata of the media uploaded by the user. +It does not contain the media itself. +Furthermore, only the `media_ids` that Synapse manages itself are exported. +If another media repository (e.g. [matrix-media-repo](https://github.com/turt2live/matrix-media-repo)) +is used, the data must be exported separately. + +With the `media_ids` the media files can be downloaded. +Media that have been sent in encrypted rooms are only retrieved in encrypted form. +The following script can help with download the media files: + +```bash +#!/usr/bin/env bash + +# Parameters +# +# source_directory: Directory which contains the export with the media_ids. +# target_directory: Directory into which all files are to be downloaded. +# repository_url: Address of the media repository resp. media worker. +# serverName: Name of the server (`server_name` from homeserver.yaml). +# +# Example: +# ./download_media.sh /tmp/export_data/media_ids/ /tmp/export_data/media_files/ http://localhost:8008 matrix.example.com + +source_directory=$1 +target_directory=$2 +repository_url=$3 +serverName=$4 + +mkdir -p $target_directory + +for file in $source_directory/*; do + filename=$(basename ${file}) + url=$repository_url/_matrix/media/v3/download/$serverName/$filename + echo "Downloading $filename - $url" + if ! wget -o /dev/null -P $target_directory $url; then + echo "Could not download $filename" + fi +done ``` Manually resetting passwords @@ -87,7 +129,7 @@ can reset a user's password using the [admin API](../../admin_api/user_admin_api I have a problem with my server. Can I just delete my database and start again? --- -Deleting your database is unlikely to make anything better. +Deleting your database is unlikely to make anything better. It's easy to make the mistake of thinking that you can start again from a clean slate by dropping your database, but things don't work like that in a federated @@ -102,7 +144,7 @@ Come and seek help in https://matrix.to/#/#synapse:matrix.org. There are two exceptions when it might be sensible to delete your database and start again: * You have *never* joined any rooms which are federated with other servers. For -instance, a local deployment which the outside world can't talk to. +instance, a local deployment which the outside world can't talk to. * You are changing the `server_name` in the homeserver configuration. In effect this makes your server a completely new one from the point of view of the network, so in this case it makes sense to start with a clean database. @@ -115,7 +157,7 @@ Using the following curl command: curl -H 'Authorization: Bearer ' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/ ``` `` - can be obtained in riot by looking in the riot settings, down the bottom is: -Access Token:\ +Access Token:\ `` - the room alias, eg. #my_room:matrix.org this possibly needs to be URL encoded also, for example %23my_room%3Amatrix.org @@ -152,13 +194,13 @@ What are the biggest rooms on my server? --- ```sql -SELECT s.canonical_alias, g.room_id, count(*) AS num_rows -FROM - state_groups_state AS g, - room_stats_state AS s -WHERE g.room_id = s.room_id +SELECT s.canonical_alias, g.room_id, count(*) AS num_rows +FROM + state_groups_state AS g, + room_stats_state AS s +WHERE g.room_id = s.room_id GROUP BY s.canonical_alias, g.room_id -ORDER BY num_rows desc +ORDER BY num_rows desc LIMIT 10; ``` diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 5003777f0d..b05fe2c589 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -44,6 +44,7 @@ from synapse.storage.databases.main.event_push_actions import ( ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.filtering import FilteringWorkerStore +from synapse.storage.databases.main.media_repository import MediaRepositoryStore from synapse.storage.databases.main.profile import ProfileWorkerStore from synapse.storage.databases.main.push_rule import PushRulesWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore @@ -86,6 +87,7 @@ class AdminCmdSlavedStore( RegistrationWorkerStore, RoomWorkerStore, ProfileWorkerStore, + MediaRepositoryStore, ): def __init__( self, @@ -235,6 +237,14 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(account_data_file, "a") as f: json.dump(account_data, fp=f) + def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None: + file_directory = os.path.join(self.base_directory, "media_ids") + os.makedirs(file_directory, exist_ok=True) + media_id_file = os.path.join(file_directory, media_id) + + with open(media_id_file, "w") as f: + json.dump(media_metadata, fp=f) + def finished(self) -> str: return self.base_directory diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 8b7760b2cc..b06f25b03c 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -252,16 +252,19 @@ class AdminHandler: profile = await self.get_user(UserID.from_string(user_id)) if profile is not None: writer.write_profile(profile) + logger.info("[%s] Written profile", user_id) # Get all devices the user has devices = await self._device_handler.get_devices_by_user(user_id) writer.write_devices(devices) + logger.info("[%s] Written %s devices", user_id, len(devices)) # Get all connections the user has connections = await self.get_whois(UserID.from_string(user_id)) writer.write_connections( connections["devices"][""]["sessions"][0]["connections"] ) + logger.info("[%s] Written %s connections", user_id, len(connections)) # Get all account data the user has global and in rooms global_data = await self._store.get_global_account_data_for_user(user_id) @@ -269,6 +272,29 @@ class AdminHandler: writer.write_account_data("global", global_data) for room_id in by_room_data: writer.write_account_data(room_id, by_room_data[room_id]) + logger.info( + "[%s] Written account data for %s rooms", user_id, len(by_room_data) + ) + + # Get all media ids the user has + limit = 100 + start = 0 + while True: + media_ids, total = await self._store.get_local_media_by_user_paginate( + start, limit, user_id + ) + for media in media_ids: + writer.write_media_id(media["media_id"], media) + + logger.info( + "[%s] Written %d media_ids of %s", + user_id, + (start + len(media_ids)), + total, + ) + if (start + limit) >= total: + break + start += limit return writer.finished() @@ -359,6 +385,18 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): """ raise NotImplementedError() + @abc.abstractmethod + def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None: + """Write the media's metadata of a user. + Exports only the metadata, as this can be fetched from the database via + read only. In order to access the files, a connection to the correct + media repository would be required. + + Args: + media_id: ID of the media. + media_metadata: Metadata of one media file. + """ + @abc.abstractmethod def finished(self) -> Any: """Called when all data has successfully been exported and written. diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index 1b97aaeed1..5569ccef8a 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -23,6 +23,7 @@ from synapse.api.constants import EventTypes, JoinRules from synapse.api.room_versions import RoomVersions from synapse.rest.client import knock, login, room from synapse.server import HomeServer +from synapse.types import UserID from synapse.util import Clock from tests import unittest @@ -323,3 +324,31 @@ class ExfiltrateData(unittest.HomeserverTestCase): args = writer.write_account_data.call_args_list[1][0] self.assertEqual(args[0], "test_room") self.assertEqual(args[1]["m.per_room"]["b"], 2) + + def test_media_ids(self) -> None: + """Tests that media's metadata get exported.""" + + self.get_success( + self._store.store_local_media( + media_id="media_1", + media_type="image/png", + time_now_ms=self.clock.time_msec(), + upload_name=None, + media_length=50, + user_id=UserID.from_string(self.user2), + ) + ) + + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + writer.write_media_id.assert_called_once() + + args = writer.write_media_id.call_args[0] + self.assertEqual(args[0], "media_1") + self.assertEqual(args[1]["media_id"], "media_1") + self.assertEqual(args[1]["media_length"], 50) + self.assertGreater(args[1]["created_ts"], 0) + self.assertIsNone(args[1]["upload_name"]) + self.assertIsNone(args[1]["last_access_ts"]) -- cgit 1.5.1 From ec79870f1422be47e8d6e85f315799888278969b Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 23 Feb 2023 16:06:42 -0500 Subject: Fix a typo in MSC3873 config option. (#15138) Previously the experimental configuration option referred to the wrong MSC number. --- changelog.d/15138.misc | 1 + synapse/config/experimental.py | 4 ++-- synapse/push/bulk_push_rule_evaluator.py | 12 ++++++------ tests/push/test_push_rule_evaluator.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) create mode 100644 changelog.d/15138.misc (limited to 'synapse') diff --git a/changelog.d/15138.misc b/changelog.d/15138.misc new file mode 100644 index 0000000000..fb706b27f2 --- /dev/null +++ b/changelog.d/15138.misc @@ -0,0 +1 @@ +Fix a typo in an experimental config setting. diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 54c91953e1..bc38fae0b6 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -175,8 +175,8 @@ class ExperimentalConfig(Config): ) # MSC3873: Disambiguate event_match keys. - self.msc3783_escape_event_match_key = experimental.get( - "msc3783_escape_event_match_key", False + self.msc3873_escape_event_match_key = experimental.get( + "msc3873_escape_event_match_key", False ) # MSC3952: Intentional mentions, this depends on MSC3758. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 8f834be774..3c4a152d6b 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -276,7 +276,7 @@ class BulkPushRuleEvaluator: if related_event is not None: related_events[relation_type] = _flatten_dict( related_event, - msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key, ) reply_event_id = ( @@ -294,7 +294,7 @@ class BulkPushRuleEvaluator: if related_event is not None: related_events["m.in_reply_to"] = _flatten_dict( related_event, - msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key, ) # indicate that this is from a fallback relation. @@ -412,7 +412,7 @@ class BulkPushRuleEvaluator: evaluator = PushRuleEvaluator( _flatten_dict( event, - msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key, ), has_mentions, user_mentions, @@ -507,7 +507,7 @@ def _flatten_dict( prefix: Optional[List[str]] = None, result: Optional[Dict[str, JsonValue]] = None, *, - msc3783_escape_event_match_key: bool = False, + msc3873_escape_event_match_key: bool = False, ) -> Dict[str, JsonValue]: """ Given a JSON dictionary (or event) which might contain sub dictionaries, @@ -536,7 +536,7 @@ def _flatten_dict( if result is None: result = {} for key, value in d.items(): - if msc3783_escape_event_match_key: + if msc3873_escape_event_match_key: # Escape periods in the key with a backslash (and backslashes with an # extra backslash). This is since a period is used as a separator between # nested fields. @@ -552,7 +552,7 @@ def _flatten_dict( value, prefix=(prefix + [key]), result=result, - msc3783_escape_event_match_key=msc3783_escape_event_match_key, + msc3873_escape_event_match_key=msc3873_escape_event_match_key, ) # `room_version` should only ever be set when looking at the top level of an event diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index d320a12f96..4e858fd16f 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -54,7 +54,7 @@ class FlattenDictTestCase(unittest.TestCase): self.assertEqual({"m.foo.b\\ar": "abc"}, _flatten_dict(input)) self.assertEqual( {"m\\.foo.b\\\\ar": "abc"}, - _flatten_dict(input, msc3783_escape_event_match_key=True), + _flatten_dict(input, msc3873_escape_event_match_key=True), ) def test_non_string(self) -> None: -- cgit 1.5.1 From f8a584ed0259cbb3c3a51726d1008d04c26b4bd8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 23 Feb 2023 16:07:46 -0500 Subject: Stop parsing the unspecced type parameter on thumbnail requests. (#15137) Ideally we would replace this with parsing of the Accept header or something else, but for now just make Synapse spec compliant by ignoring the unspecced parameter. It does not seem that this is ever sent by a client, and even if it is there's a reasonable fallback. --- changelog.d/15137.removal | 1 + synapse/rest/media/v1/thumbnail_resource.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15137.removal (limited to 'synapse') diff --git a/changelog.d/15137.removal b/changelog.d/15137.removal new file mode 100644 index 0000000000..c533b0c9dd --- /dev/null +++ b/changelog.d/15137.removal @@ -0,0 +1 @@ +Remove the undocumented and unspecced `type` parameter to the `/thumbnail` endpoint. diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 5f725c7600..3e720018b3 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -69,7 +69,8 @@ class ThumbnailResource(DirectServeJsonResource): width = parse_integer(request, "width", required=True) height = parse_integer(request, "height", required=True) method = parse_string(request, "method", "scale") - m_type = parse_string(request, "type", "image/png") + # TODO Parse the Accept header to get an prioritised list of thumbnail types. + m_type = "image/png" if server_name == self.server_name: if self.dynamic_thumbnails: -- cgit 1.5.1 From 682151a464f688768d5bd8308e16bd4024ad2e57 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 23 Feb 2023 16:08:53 -0500 Subject: Do not fail completely if oEmbed autodiscovery fails. (#15092) Previously if an autodiscovered oEmbed request failed (e.g. the oEmbed endpoint is down or does not exist) then the entire URL preview would fail. Instead we now return everything we can, even if this additional request fails. --- changelog.d/15092.bugfix | 1 + synapse/rest/media/v1/preview_url_resource.py | 33 ++++++++++++++------ tests/rest/media/v1/test_url_preview.py | 44 +++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 13 deletions(-) create mode 100644 changelog.d/15092.bugfix (limited to 'synapse') diff --git a/changelog.d/15092.bugfix b/changelog.d/15092.bugfix new file mode 100644 index 0000000000..67509c5c69 --- /dev/null +++ b/changelog.d/15092.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where a URL preview would break if the discovered oEmbed failed to download. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index a8f6fd6b35..4a594ab9d8 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -163,6 +163,10 @@ class PreviewUrlResource(DirectServeJsonResource): 7. Stores the result in the database cache. 4. Returns the result. + If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or + image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole + does not fail. As much information as possible is returned. + The in-memory cache expires after 1 hour. Expired entries in the database cache (and their associated media files) are @@ -364,16 +368,25 @@ class PreviewUrlResource(DirectServeJsonResource): oembed_url = self._oembed.autodiscover_from_html(tree) og_from_oembed: JsonDict = {} if oembed_url: - oembed_info = await self._handle_url( - oembed_url, user, allow_data_urls=True - ) - ( - og_from_oembed, - author_name, - expiration_ms, - ) = await self._handle_oembed_response( - url, oembed_info, expiration_ms - ) + try: + oembed_info = await self._handle_url( + oembed_url, user, allow_data_urls=True + ) + except Exception as e: + # Fetching the oEmbed info failed, don't block the entire URL preview. + logger.warning( + "oEmbed fetch failed during URL preview: %s errored with %s", + oembed_url, + e, + ) + else: + ( + og_from_oembed, + author_name, + expiration_ms, + ) = await self._handle_oembed_response( + url, oembed_info, expiration_ms + ) # Parse Open Graph information from the HTML in case the oEmbed # response failed or is incomplete. diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 6fcf60ce19..2acfccec61 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -657,7 +657,7 @@ class URLPreviewTests(unittest.HomeserverTestCase): """If the preview image doesn't exist, ensure some data is returned.""" self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] - end_content = ( + result = ( b"""""" ) @@ -678,8 +678,8 @@ class URLPreviewTests(unittest.HomeserverTestCase): b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" b'Content-Type: text/html; charset="utf8"\r\n\r\n' ) - % (len(end_content),) - + end_content + % (len(result),) + + result ) self.pump() @@ -688,6 +688,44 @@ class URLPreviewTests(unittest.HomeserverTestCase): # The image should not be in the result. self.assertNotIn("og:image", channel.json_body) + def test_oembed_failure(self) -> None: + """If the autodiscovered oEmbed URL fails, ensure some data is returned.""" + self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] + + result = b""" + oEmbed Autodiscovery Fail + + """ + + channel = self.make_request( + "GET", + "preview_url?url=http://matrix.org", + shorthand=False, + await_result=False, + ) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b'Content-Type: text/html; charset="utf8"\r\n\r\n' + ) + % (len(result),) + + result + ) + + self.pump() + self.assertEqual(channel.code, 200) + + # The image should not be in the result. + self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail") + def test_data_url(self) -> None: """ Requesting to preview a data URL is not supported. -- cgit 1.5.1 From 335f52d595c2c32e4b512b97e2851bc98b819ca7 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 24 Feb 2023 13:39:45 +0000 Subject: Improve handling of non-ASCII characters in user directory search (#15143) * Fix a long-standing bug where non-ASCII characters in search terms, including accented letters, would not match characters in a different case. * Fix a long-standing bug where search terms using combining accents would not match display names using precomposed accents and vice versa. To fully take effect, the user directory must be rebuilt after this change. Fixes #14630. Signed-off-by: Sean Quah --- changelog.d/15143.misc | 1 + synapse/storage/databases/main/user_directory.py | 52 ++++++++- tests/storage/test_user_directory.py | 133 +++++++++++++++++++++++ 3 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 changelog.d/15143.misc (limited to 'synapse') diff --git a/changelog.d/15143.misc b/changelog.d/15143.misc new file mode 100644 index 0000000000..cff4518811 --- /dev/null +++ b/changelog.d/15143.misc @@ -0,0 +1 @@ +Fix a long-standing bug where the user directory search was not case-insensitive for accented characters. diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index c3f2b61bd5..f16a509ac4 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -14,6 +14,7 @@ import logging import re +import unicodedata from typing import ( TYPE_CHECKING, Iterable, @@ -490,6 +491,11 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): values={"display_name": display_name, "avatar_url": avatar_url}, ) + # The display name that goes into the database index. + index_display_name = display_name + if index_display_name is not None: + index_display_name = _filter_text_for_index(index_display_name) + if isinstance(self.database_engine, PostgresEngine): # We weight the localpart most highly, then display name and finally # server name @@ -507,11 +513,15 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, + index_display_name, ), ) elif isinstance(self.database_engine, Sqlite3Engine): - value = "%s %s" % (user_id, display_name) if display_name else user_id + value = ( + "%s %s" % (user_id, index_display_name) + if index_display_name + else user_id + ) self.db_pool.simple_upsert_txn( txn, table="user_directory_search", @@ -896,6 +906,41 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): return {"limited": limited, "results": results[0:limit]} +def _filter_text_for_index(text: str) -> str: + """Transforms text before it is inserted into the user directory index, or searched + for in the user directory index. + + Note that the user directory search table needs to be rebuilt whenever this function + changes. + """ + # Lowercase the text, to make searches case-insensitive. + # This is necessary for both PostgreSQL and SQLite. PostgreSQL's + # `to_tsquery/to_tsvector` functions don't lowercase non-ASCII characters when using + # the "C" collation, while SQLite just doesn't lowercase non-ASCII characters at + # all. + text = text.lower() + + # Normalize the text. NFKC normalization has two effects: + # 1. It canonicalizes the text, ie. maps all visually identical strings to the same + # string. For example, ["e", "◌́"] is mapped to ["é"]. + # 2. It maps strings that are roughly equivalent to the same string. + # For example, ["dž"] is mapped to ["d", "ž"], ["①"] to ["1"] and ["i⁹"] to + # ["i", "9"]. + text = unicodedata.normalize("NFKC", text) + + # Note that nothing is done to make searches accent-insensitive. + # That could be achieved by converting to NFKD form instead (with combining accents + # split out) and filtering out combining accents using `unicodedata.combining(c)`. + # The downside of this may be noisier search results, since search terms with + # explicit accents will match characters with no accents, or completely different + # accents. + # + # text = unicodedata.normalize("NFKD", text) + # text = "".join([c for c in text if not unicodedata.combining(c)]) + + return text + + def _parse_query_sqlite(search_term: str) -> str: """Takes a plain unicode string from the user and converts it into a form that can be passed to database. @@ -905,6 +950,7 @@ def _parse_query_sqlite(search_term: str) -> str: We specifically add both a prefix and non prefix matching term so that exact matches get ranked higher. """ + search_term = _filter_text_for_index(search_term) # Pull out the individual words, discarding any non-word characters. results = _parse_words(search_term) @@ -917,6 +963,8 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]: We use this so that we can add prefix matching, which isn't something that is supported by default. """ + search_term = _filter_text_for_index(search_term) + escaped_words = [] for word in _parse_words(search_term): # Postgres tsvector and tsquery quoting rules: diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 2d169684cf..43b724c4dd 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -504,6 +504,139 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, ) + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_ascii_case_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name in a + different case. + """ + CHARLIE = "@someuser:example.org" + self.get_success( + self.store.update_profile_in_user_dir(CHARLIE, "Charlie", None) + ) + + r = self.get_success(self.store.search_user_dir(ALICE, "cHARLIE", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": CHARLIE, "display_name": "Charlie", "avatar_url": None}, + ) + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_unicode_case_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name in a + different case. + """ + IVAN = "@someuser:example.org" + self.get_success(self.store.update_profile_in_user_dir(IVAN, "Иван", None)) + + r = self.get_success(self.store.search_user_dir(ALICE, "иВАН", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": IVAN, "display_name": "Иван", "avatar_url": None}, + ) + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_dotted_dotless_i_case_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name in a + different case, when their name contains dotted or dotless "i"s. + + Some languages have dotted and dotless versions of "i", which are considered to + be different letters: i <-> İ, ı <-> I. To make things difficult, they reuse the + ASCII "i" and "I" code points, despite having different lowercase / uppercase + forms. + """ + USER = "@someuser:example.org" + + expected_matches = [ + # (search_term, display_name) + # A search for "i" should match "İ". + ("iiiii", "İİİİİ"), + # A search for "I" should match "ı". + ("IIIII", "ııııı"), + # A search for "ı" should match "I". + ("ııııı", "IIIII"), + # A search for "İ" should match "i". + ("İİİİİ", "iiiii"), + ] + + for search_term, display_name in expected_matches: + self.get_success( + self.store.update_profile_in_user_dir(USER, display_name, None) + ) + + r = self.get_success(self.store.search_user_dir(ALICE, search_term, 10)) + self.assertFalse(r["limited"]) + self.assertEqual( + 1, + len(r["results"]), + f"searching for {search_term!r} did not match {display_name!r}", + ) + self.assertDictEqual( + r["results"][0], + {"user_id": USER, "display_name": display_name, "avatar_url": None}, + ) + + # We don't test for negative matches, to allow implementations that consider all + # the i variants to be the same. + + test_search_user_dir_dotted_dotless_i_case_insensitivity.skip = "not supported" # type: ignore + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_unicode_normalization(self) -> None: + """Tests that a user can look up another user by searching for their name with + either composed or decomposed accents. + """ + AMELIE = "@someuser:example.org" + + expected_matches = [ + # (search_term, display_name) + ("Ame\u0301lie", "Amélie"), + ("Amélie", "Ame\u0301lie"), + ] + + for search_term, display_name in expected_matches: + self.get_success( + self.store.update_profile_in_user_dir(AMELIE, display_name, None) + ) + + r = self.get_success(self.store.search_user_dir(ALICE, search_term, 10)) + self.assertFalse(r["limited"]) + self.assertEqual( + 1, + len(r["results"]), + f"searching for {search_term!r} did not match {display_name!r}", + ) + self.assertDictEqual( + r["results"][0], + {"user_id": AMELIE, "display_name": display_name, "avatar_url": None}, + ) + + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_accent_insensitivity(self) -> None: + """Tests that a user can look up another user by searching for their name + without any accents. + """ + AMELIE = "@someuser:example.org" + self.get_success(self.store.update_profile_in_user_dir(AMELIE, "Amélie", None)) + + r = self.get_success(self.store.search_user_dir(ALICE, "amelie", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": AMELIE, "display_name": "Amélie", "avatar_url": None}, + ) + + # It may be desirable for "é"s in search terms to not match plain "e"s and we + # really don't want "é"s in search terms to match "e"s with different accents. + # But we don't test for this to allow implementations that consider all + # "e"-lookalikes to be the same. + + test_search_user_dir_accent_insensitivity.skip = "not supported yet" # type: ignore + class UserDirectoryStoreTestCaseWithIcu(UserDirectoryStoreTestCase): use_icu = True -- cgit 1.5.1 From b2357a898cdd1f4a2222609abfe471801ea88dcd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 24 Feb 2023 14:39:50 +0000 Subject: Fix bug where 5s delays would occasionally happen. (#15150) This only affects deployments using workers. --- changelog.d/15150.bugfix | 1 + synapse/replication/tcp/resource.py | 18 +++++++++++ tests/replication/tcp/test_handler.py | 61 +++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 changelog.d/15150.bugfix (limited to 'synapse') diff --git a/changelog.d/15150.bugfix b/changelog.d/15150.bugfix new file mode 100644 index 0000000000..8668bc587f --- /dev/null +++ b/changelog.d/15150.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.76 where 5s delays would occasionally occur in deployments using workers. diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 9d17eff714..347467d863 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -238,6 +238,24 @@ class ReplicationStreamer: except Exception: logger.exception("Failed to replicate") + # The last token we send may not match the current + # token, in which case we want to send out a `POSITION` + # to tell other workers the actual current position. + if updates[-1][0] < current_token: + logger.info( + "Sending position: %s -> %s", + stream.NAME, + current_token, + ) + self.command_handler.send_command( + PositionCommand( + stream.NAME, + self._instance_name, + updates[-1][0], + current_token, + ) + ) + logger.debug("No more pending updates, breaking poke loop") finally: self.pending_updates = False diff --git a/tests/replication/tcp/test_handler.py b/tests/replication/tcp/test_handler.py index bf927beb6a..bab77b2df7 100644 --- a/tests/replication/tcp/test_handler.py +++ b/tests/replication/tcp/test_handler.py @@ -141,3 +141,64 @@ class ChannelsTestCase(BaseMultiWorkerStreamTestCase): self.get_success(ctx_worker1.__aexit__(None, None, None)) self.assertTrue(d.called) + + def test_wait_for_stream_position_rdata(self) -> None: + """Check that wait for stream position correctly waits for an update + from the correct instance, when RDATA is sent. + """ + store = self.hs.get_datastores().main + cmd_handler = self.hs.get_replication_command_handler() + data_handler = self.hs.get_replication_data_handler() + + worker1 = self.make_worker_hs( + "synapse.app.generic_worker", + extra_config={ + "worker_name": "worker1", + "run_background_tasks_on": "worker1", + "redis": {"enabled": True}, + }, + ) + + cache_id_gen = worker1.get_datastores().main._cache_id_gen + assert cache_id_gen is not None + + self.replicate() + + # First, make sure the master knows that `worker1` exists. + initial_token = cache_id_gen.get_current_token() + cmd_handler.send_command( + PositionCommand("caches", "worker1", initial_token, initial_token) + ) + self.replicate() + + # `wait_for_stream_position` should only return once master receives a + # notification that `next_token2` has persisted. + ctx_worker1 = cache_id_gen.get_next_mult(2) + next_token1, next_token2 = self.get_success(ctx_worker1.__aenter__()) + + d = defer.ensureDeferred( + data_handler.wait_for_stream_position("worker1", "caches", next_token2) + ) + self.assertFalse(d.called) + + # Insert an entry into the cache stream with token `next_token1`, but + # not `next_token2`. + self.get_success( + store.db_pool.simple_insert( + table="cache_invalidation_stream_by_instance", + values={ + "stream_id": next_token1, + "instance_name": "worker1", + "cache_func": "foo", + "keys": [], + "invalidation_ts": 0, + }, + ) + ) + + # Finish the context manager, triggering the data to be sent to master. + self.get_success(ctx_worker1.__aexit__(None, None, None)) + + # Master should get told about `next_token2`, so the deferred should + # resolve. + self.assertTrue(d.called) -- cgit 1.5.1 From 1c95ddd09bbc46046a3412e7bb03a87aa3b6f65a Mon Sep 17 00:00:00 2001 From: Shay Date: Fri, 24 Feb 2023 13:15:29 -0800 Subject: Batch up storing state groups when creating new room (#14918) --- changelog.d/14918.misc | 1 + synapse/events/snapshot.py | 49 +++++++++++ synapse/handlers/message.py | 16 ++-- synapse/handlers/room.py | 37 ++++---- synapse/handlers/room_batch.py | 4 +- synapse/handlers/room_member.py | 13 ++- synapse/storage/databases/state/store.py | 119 ++++++++++++++++++++++++++ tests/handlers/test_message.py | 25 ++++-- tests/handlers/test_register.py | 3 +- tests/push/test_bulk_push_rule_evaluator.py | 13 +-- tests/rest/client/test_rooms.py | 4 +- tests/storage/test_event_chain.py | 6 +- tests/storage/test_state.py | 126 ++++++++++++++++++++++++++++ tests/unittest.py | 4 +- 14 files changed, 371 insertions(+), 49 deletions(-) create mode 100644 changelog.d/14918.misc (limited to 'synapse') diff --git a/changelog.d/14918.misc b/changelog.d/14918.misc new file mode 100644 index 0000000000..828794354a --- /dev/null +++ b/changelog.d/14918.misc @@ -0,0 +1 @@ +Batch up storing state groups when creating a new room. \ No newline at end of file diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index e0d82ad81c..a91a5d1e3c 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -23,6 +23,7 @@ from synapse.types import JsonDict, StateMap if TYPE_CHECKING: from synapse.storage.controllers import StorageControllers + from synapse.storage.databases import StateGroupDataStore from synapse.storage.databases.main import DataStore from synapse.types.state import StateFilter @@ -348,6 +349,54 @@ class UnpersistedEventContext(UnpersistedEventContextBase): partial_state: bool state_map_before_event: Optional[StateMap[str]] = None + @classmethod + async def batch_persist_unpersisted_contexts( + cls, + events_and_context: List[Tuple[EventBase, "UnpersistedEventContextBase"]], + room_id: str, + last_known_state_group: int, + datastore: "StateGroupDataStore", + ) -> List[Tuple[EventBase, EventContext]]: + """ + Takes a list of events and their associated unpersisted contexts and persists + the unpersisted contexts, returning a list of events and persisted contexts. + Note that all the events must be in a linear chain (ie a <- b <- c). + + Args: + events_and_context: A list of events and their unpersisted contexts + room_id: the room_id for the events + last_known_state_group: the last persisted state group + datastore: a state datastore + """ + amended_events_and_context = await datastore.store_state_deltas_for_batched( + events_and_context, room_id, last_known_state_group + ) + + events_and_persisted_context = [] + for event, unpersisted_context in amended_events_and_context: + if event.is_state(): + context = EventContext( + storage=unpersisted_context._storage, + state_group=unpersisted_context.state_group_after_event, + state_group_before_event=unpersisted_context.state_group_before_event, + state_delta_due_to_event=unpersisted_context.state_delta_due_to_event, + partial_state=unpersisted_context.partial_state, + prev_group=unpersisted_context.state_group_before_event, + delta_ids=unpersisted_context.state_delta_due_to_event, + ) + else: + context = EventContext( + storage=unpersisted_context._storage, + state_group=unpersisted_context.state_group_after_event, + state_group_before_event=unpersisted_context.state_group_before_event, + state_delta_due_to_event=unpersisted_context.state_delta_due_to_event, + partial_state=unpersisted_context.partial_state, + prev_group=unpersisted_context.prev_group_for_state_group_before_event, + delta_ids=unpersisted_context.delta_ids_to_state_group_before_event, + ) + events_and_persisted_context.append((event, context)) + return events_and_persisted_context + async def get_prev_state_ids( self, state_filter: Optional["StateFilter"] = None ) -> StateMap[str]: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index aa90d0000d..e433d6b01f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -574,7 +574,7 @@ class EventCreationHandler: state_map: Optional[StateMap[str]] = None, for_batch: bool = False, current_state_group: Optional[int] = None, - ) -> Tuple[EventBase, EventContext]: + ) -> Tuple[EventBase, UnpersistedEventContextBase]: """ Given a dict from a client, create a new event. If bool for_batch is true, will create an event using the prev_event_ids, and will create an event context for @@ -721,8 +721,6 @@ class EventCreationHandler: current_state_group=current_state_group, ) - context = await unpersisted_context.persist(event) - # In an ideal world we wouldn't need the second part of this condition. However, # this behaviour isn't spec'd yet, meaning we should be able to deactivate this # behaviour. Another reason is that this code is also evaluated each time a new @@ -739,7 +737,7 @@ class EventCreationHandler: assert state_map is not None prev_event_id = state_map.get((EventTypes.Member, event.sender)) else: - prev_state_ids = await context.get_prev_state_ids( + prev_state_ids = await unpersisted_context.get_prev_state_ids( StateFilter.from_types([(EventTypes.Member, None)]) ) prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender)) @@ -764,8 +762,7 @@ class EventCreationHandler: ) self.validator.validate_new(event, self.config) - - return event, context + return event, unpersisted_context async def _is_exempt_from_privacy_policy( self, builder: EventBuilder, requester: Requester @@ -1005,7 +1002,7 @@ class EventCreationHandler: max_retries = 5 for i in range(max_retries): try: - event, context = await self.create_event( + event, unpersisted_context = await self.create_event( requester, event_dict, txn_id=txn_id, @@ -1016,6 +1013,7 @@ class EventCreationHandler: historical=historical, depth=depth, ) + context = await unpersisted_context.persist(event) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( event.sender, @@ -1190,7 +1188,6 @@ class EventCreationHandler: if for_batch: assert prev_event_ids is not None assert state_map is not None - assert current_state_group is not None auth_ids = self._event_auth_handler.compute_auth_events(builder, state_map) event = await builder.build( prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth @@ -2046,7 +2043,7 @@ class EventCreationHandler: max_retries = 5 for i in range(max_retries): try: - event, context = await self.create_event( + event, unpersisted_context = await self.create_event( requester, { "type": EventTypes.Dummy, @@ -2055,6 +2052,7 @@ class EventCreationHandler: "sender": user_id, }, ) + context = await unpersisted_context.persist(event) event.internal_metadata.proactively_send = False diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a26ec02284..b1784638f4 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -51,6 +51,7 @@ from synapse.api.filtering import Filter from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase +from synapse.events.snapshot import UnpersistedEventContext from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM @@ -211,7 +212,7 @@ class RoomCreationHandler: # the required power level to send the tombstone event. ( tombstone_event, - tombstone_context, + tombstone_unpersisted_context, ) = await self.event_creation_handler.create_event( requester, { @@ -225,6 +226,9 @@ class RoomCreationHandler: }, }, ) + tombstone_context = await tombstone_unpersisted_context.persist( + tombstone_event + ) validate_event_for_room_version(tombstone_event) await self._event_auth_handler.check_auth_rules_from_context( tombstone_event @@ -1092,7 +1096,7 @@ class RoomCreationHandler: content: JsonDict, for_batch: bool, **kwargs: Any, - ) -> Tuple[EventBase, synapse.events.snapshot.EventContext]: + ) -> Tuple[EventBase, synapse.events.snapshot.UnpersistedEventContextBase]: """ Creates an event and associated event context. Args: @@ -1111,20 +1115,23 @@ class RoomCreationHandler: event_dict = create_event_dict(etype, content, **kwargs) - new_event, new_context = await self.event_creation_handler.create_event( + ( + new_event, + new_unpersisted_context, + ) = await self.event_creation_handler.create_event( creator, event_dict, prev_event_ids=prev_event, depth=depth, state_map=state_map, for_batch=for_batch, - current_state_group=current_state_group, ) + depth += 1 prev_event = [new_event.event_id] state_map[(new_event.type, new_event.state_key)] = new_event.event_id - return new_event, new_context + return new_event, new_unpersisted_context try: config = self._presets_dict[preset_config] @@ -1134,10 +1141,10 @@ class RoomCreationHandler: ) creation_content.update({"creator": creator_id}) - creation_event, creation_context = await create_event( + creation_event, unpersisted_creation_context = await create_event( EventTypes.Create, creation_content, False ) - + creation_context = await unpersisted_creation_context.persist(creation_event) logger.debug("Sending %s in new room", EventTypes.Member) ev = await self.event_creation_handler.handle_new_client_event( requester=creator, @@ -1181,7 +1188,6 @@ class RoomCreationHandler: power_event, power_context = await create_event( EventTypes.PowerLevels, pl_content, True ) - current_state_group = power_context._state_group events_to_send.append((power_event, power_context)) else: power_level_content: JsonDict = { @@ -1230,14 +1236,12 @@ class RoomCreationHandler: power_level_content, True, ) - current_state_group = pl_context._state_group events_to_send.append((pl_event, pl_context)) if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state: room_alias_event, room_alias_context = await create_event( EventTypes.CanonicalAlias, {"alias": room_alias.to_string()}, True ) - current_state_group = room_alias_context._state_group events_to_send.append((room_alias_event, room_alias_context)) if (EventTypes.JoinRules, "") not in initial_state: @@ -1246,7 +1250,6 @@ class RoomCreationHandler: {"join_rule": config["join_rules"]}, True, ) - current_state_group = join_rules_context._state_group events_to_send.append((join_rules_event, join_rules_context)) if (EventTypes.RoomHistoryVisibility, "") not in initial_state: @@ -1255,7 +1258,6 @@ class RoomCreationHandler: {"history_visibility": config["history_visibility"]}, True, ) - current_state_group = visibility_context._state_group events_to_send.append((visibility_event, visibility_context)) if config["guest_can_join"]: @@ -1265,14 +1267,12 @@ class RoomCreationHandler: {EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN}, True, ) - current_state_group = guest_access_context._state_group events_to_send.append((guest_access_event, guest_access_context)) for (etype, state_key), content in initial_state.items(): event, context = await create_event( etype, content, True, state_key=state_key ) - current_state_group = context._state_group events_to_send.append((event, context)) if config["encrypted"]: @@ -1284,9 +1284,16 @@ class RoomCreationHandler: ) events_to_send.append((encryption_event, encryption_context)) + datastore = self.hs.get_datastores().state + events_and_context = ( + await UnpersistedEventContext.batch_persist_unpersisted_contexts( + events_to_send, room_id, current_state_group, datastore + ) + ) + last_event = await self.event_creation_handler.handle_new_client_event( creator, - events_to_send, + events_and_context, ignore_shadow_ban=True, ratelimit=False, ) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 5d4ca0e2d2..bf9df60218 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -327,7 +327,7 @@ class RoomBatchHandler: # Mark all events as historical event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True - event, context = await self.event_creation_handler.create_event( + event, unpersisted_context = await self.event_creation_handler.create_event( await self.create_requester_for_user_id_from_app_service( ev["sender"], app_service_requester.app_service ), @@ -345,7 +345,7 @@ class RoomBatchHandler: historical=True, depth=inherited_depth, ) - + context = await unpersisted_context.persist(event) assert context._state_group # Normally this is done when persisting the event but we have to diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a965c7ec76..de7476f300 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -414,7 +414,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): max_retries = 5 for i in range(max_retries): try: - event, context = await self.event_creation_handler.create_event( + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_event( requester, { "type": EventTypes.Member, @@ -435,7 +438,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): outlier=outlier, historical=historical, ) - + context = await unpersisted_context.persist(event) prev_state_ids = await context.get_prev_state_ids( StateFilter.from_types([(EventTypes.Member, None)]) ) @@ -1944,7 +1947,10 @@ class RoomMemberMasterHandler(RoomMemberHandler): max_retries = 5 for i in range(max_retries): try: - event, context = await self.event_creation_handler.create_event( + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_event( requester, event_dict, txn_id=txn_id, @@ -1952,6 +1958,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): auth_event_ids=auth_event_ids, outlier=True, ) + context = await unpersisted_context.persist(event) event.internal_metadata.out_of_band_membership = True result_event = ( diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 89b1faa6c8..bf4cdfdf29 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -18,6 +18,8 @@ from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Se import attr from synapse.api.constants import EventTypes +from synapse.events import EventBase +from synapse.events.snapshot import UnpersistedEventContext, UnpersistedEventContextBase from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -401,6 +403,123 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): fetched_keys=non_member_types, ) + async def store_state_deltas_for_batched( + self, + events_and_context: List[Tuple[EventBase, UnpersistedEventContextBase]], + room_id: str, + prev_group: int, + ) -> List[Tuple[EventBase, UnpersistedEventContext]]: + """Generate and store state deltas for a group of events and contexts created to be + batch persisted. Note that all the events must be in a linear chain (ie a <- b <- c). + + Args: + events_and_context: the events to generate and store a state groups for + and their associated contexts + room_id: the id of the room the events were created for + prev_group: the state group of the last event persisted before the batched events + were created + """ + + def insert_deltas_group_txn( + txn: LoggingTransaction, + events_and_context: List[Tuple[EventBase, UnpersistedEventContext]], + prev_group: int, + ) -> List[Tuple[EventBase, UnpersistedEventContext]]: + """Generate and store state groups for the provided events and contexts. + + Requires that we have the state as a delta from the last persisted state group. + + Returns: + A list of state groups + """ + is_in_db = self.db_pool.simple_select_one_onecol_txn( + txn, + table="state_groups", + keyvalues={"id": prev_group}, + retcol="id", + allow_none=True, + ) + if not is_in_db: + raise Exception( + "Trying to persist state with unpersisted prev_group: %r" + % (prev_group,) + ) + + num_state_groups = sum( + 1 for event, _ in events_and_context if event.is_state() + ) + + state_groups = self._state_group_seq_gen.get_next_mult_txn( + txn, num_state_groups + ) + + sg_before = prev_group + state_group_iter = iter(state_groups) + for event, context in events_and_context: + if not event.is_state(): + context.state_group_after_event = sg_before + context.state_group_before_event = sg_before + continue + + sg_after = next(state_group_iter) + context.state_group_after_event = sg_after + context.state_group_before_event = sg_before + context.state_delta_due_to_event = { + (event.type, event.state_key): event.event_id + } + sg_before = sg_after + + self.db_pool.simple_insert_many_txn( + txn, + table="state_groups", + keys=("id", "room_id", "event_id"), + values=[ + (context.state_group_after_event, room_id, event.event_id) + for event, context in events_and_context + if event.is_state() + ], + ) + + self.db_pool.simple_insert_many_txn( + txn, + table="state_group_edges", + keys=("state_group", "prev_state_group"), + values=[ + ( + context.state_group_after_event, + context.state_group_before_event, + ) + for event, context in events_and_context + if event.is_state() + ], + ) + + self.db_pool.simple_insert_many_txn( + txn, + table="state_groups_state", + keys=("state_group", "room_id", "type", "state_key", "event_id"), + values=[ + ( + context.state_group_after_event, + room_id, + key[0], + key[1], + state_id, + ) + for event, context in events_and_context + if context.state_delta_due_to_event is not None + for key, state_id in context.state_delta_due_to_event.items() + ], + ) + return events_and_context + + return await self.db_pool.runInteraction( + "store_state_deltas_for_batched.insert_deltas_group", + insert_deltas_group_txn, + events_and_context, + prev_group, + ) + async def store_state_group( self, event_id: str, diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py index 69d384442f..9691d66b48 100644 --- a/tests/handlers/test_message.py +++ b/tests/handlers/test_message.py @@ -18,7 +18,7 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer @@ -79,7 +79,9 @@ class EventCreationTestCase(unittest.HomeserverTestCase): return memberEvent, memberEventContext - def _create_duplicate_event(self, txn_id: str) -> Tuple[EventBase, EventContext]: + def _create_duplicate_event( + self, txn_id: str + ) -> Tuple[EventBase, UnpersistedEventContextBase]: """Create a new event with the given transaction ID. All events produced by this method will be considered duplicates. """ @@ -107,7 +109,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase): txn_id = "something_suitably_random" - event1, context = self._create_duplicate_event(txn_id) + event1, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event1)) ret_event1 = self.get_success( self.handler.handle_new_client_event( @@ -119,7 +122,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase): self.assertEqual(event1.event_id, ret_event1.event_id) - event2, context = self._create_duplicate_event(txn_id) + event2, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event2)) # We want to test that the deduplication at the persit event end works, # so we want to make sure we test with different events. @@ -140,7 +144,9 @@ class EventCreationTestCase(unittest.HomeserverTestCase): # Let's test that calling `persist_event` directly also does the right # thing. - event3, context = self._create_duplicate_event(txn_id) + event3, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event3)) + self.assertNotEqual(event1.event_id, event3.event_id) ret_event3, event_pos3, _ = self.get_success( @@ -154,7 +160,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase): # Let's test that calling `persist_events` directly also does the right # thing. - event4, context = self._create_duplicate_event(txn_id) + event4, unpersisted_context = self._create_duplicate_event(txn_id) + context = self.get_success(unpersisted_context.persist(event4)) self.assertNotEqual(event1.event_id, event3.event_id) events, _ = self.get_success( @@ -174,8 +181,10 @@ class EventCreationTestCase(unittest.HomeserverTestCase): txn_id = "something_else_suitably_random" # Create two duplicate events to persist at the same time - event1, context1 = self._create_duplicate_event(txn_id) - event2, context2 = self._create_duplicate_event(txn_id) + event1, unpersisted_context1 = self._create_duplicate_event(txn_id) + context1 = self.get_success(unpersisted_context1.persist(event1)) + event2, unpersisted_context2 = self._create_duplicate_event(txn_id) + context2 = self.get_success(unpersisted_context2.persist(event2)) # Ensure their event IDs are different to start with self.assertNotEqual(event1.event_id, event2.event_id) diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 1db99b3c00..aff1ec4758 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -507,7 +507,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase): # Lower the permissions of the inviter. event_creation_handler = self.hs.get_event_creation_handler() requester = create_requester(inviter) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_creation_handler.create_event( requester, { @@ -519,6 +519,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase): }, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( event_creation_handler.handle_new_client_event( requester, events_and_context=[(event, context)] diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index dce6899e78..1458076a90 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -130,7 +130,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): # Create a new message event, and try to evaluate it under the dodgy # power level event. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -145,6 +145,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): prev_event_ids=[pl_event_id], ) ) + context = self.get_success(unpersisted_context.persist(event)) bulk_evaluator = BulkPushRuleEvaluator(self.hs) # should not raise @@ -170,7 +171,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): """Ensure that push rules are not calculated when disabled in the config""" # Create a new message event which should cause a notification. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -184,6 +185,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): }, ) ) + context = self.get_success(unpersisted_context.persist(event)) bulk_evaluator = BulkPushRuleEvaluator(self.hs) # Mock the method which calculates push rules -- we do this instead of @@ -200,7 +202,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) -> bool: """Returns true iff the `mentions` trigger an event push action.""" # Create a new message event which should cause a notification. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -211,7 +213,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): }, ) ) - + context = self.get_success(unpersisted_context.persist(event)) # Execute the push rule machinery. self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) @@ -390,7 +392,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): bulk_evaluator = BulkPushRuleEvaluator(self.hs) # Create & persist an event to use as the parent of the relation. - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_event( self.requester, { @@ -404,6 +406,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): }, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self.event_creation_handler.handle_new_client_event( self.requester, events_and_context=[(event, context)] diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 4dd763096d..a4900703c4 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -713,7 +713,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(33, channel.resource_usage.db_txn_count) + self.assertEqual(30, channel.resource_usage.db_txn_count) def test_post_room_initial_state(self) -> None: # POST with initial_state config key, expect new room id @@ -726,7 +726,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(36, channel.resource_usage.db_txn_count) + self.assertEqual(32, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index 73d11e7786..e39b63edac 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -522,7 +522,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): latest_event_ids = self.get_success( self.store.get_prev_events_for_room(room_id) ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_handler.create_event( self.requester, { @@ -535,6 +535,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): prev_event_ids=latest_event_ids, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( event_handler.handle_new_client_event( self.requester, events_and_context=[(event, context)] @@ -544,7 +545,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): assert state_ids1 is not None state1 = set(state_ids1.values()) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_handler.create_event( self.requester, { @@ -557,6 +558,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase): prev_event_ids=latest_event_ids, ) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( event_handler.handle_new_client_event( self.requester, events_and_context=[(event, context)] diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index e82c03f597..62aed6af0a 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -496,3 +496,129 @@ class StateStoreTestCase(HomeserverTestCase): self.assertEqual(is_all, True) self.assertDictEqual({(e5.type, e5.state_key): e5.event_id}, state_dict) + + def test_batched_state_group_storing(self) -> None: + creation_event = self.inject_state_event( + self.room, self.u_alice, EventTypes.Create, "", {} + ) + state_to_event = self.get_success( + self.storage.state.get_state_groups( + self.room.to_string(), [creation_event.event_id] + ) + ) + current_state_group = list(state_to_event.keys())[0] + + # create some unpersisted events and event contexts to store against room + events_and_context = [] + builder = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.Name, + "sender": self.u_alice.to_string(), + "state_key": "", + "room_id": self.room.to_string(), + "content": {"name": "first rename of room"}, + }, + ) + + event1, unpersisted_context1 = self.get_success( + self.event_creation_handler.create_new_client_event(builder) + ) + events_and_context.append((event1, unpersisted_context1)) + + builder2 = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.JoinRules, + "sender": self.u_alice.to_string(), + "state_key": "", + "room_id": self.room.to_string(), + "content": {"join_rule": "private"}, + }, + ) + + event2, unpersisted_context2 = self.get_success( + self.event_creation_handler.create_new_client_event(builder2) + ) + events_and_context.append((event2, unpersisted_context2)) + + builder3 = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.Message, + "sender": self.u_alice.to_string(), + "room_id": self.room.to_string(), + "content": {"body": "hello from event 3", "msgtype": "m.text"}, + }, + ) + + event3, unpersisted_context3 = self.get_success( + self.event_creation_handler.create_new_client_event(builder3) + ) + events_and_context.append((event3, unpersisted_context3)) + + builder4 = self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.JoinRules, + "sender": self.u_alice.to_string(), + "state_key": "", + "room_id": self.room.to_string(), + "content": {"join_rule": "public"}, + }, + ) + + event4, unpersisted_context4 = self.get_success( + self.event_creation_handler.create_new_client_event(builder4) + ) + events_and_context.append((event4, unpersisted_context4)) + + processed_events_and_context = self.get_success( + self.hs.get_datastores().state.store_state_deltas_for_batched( + events_and_context, self.room.to_string(), current_state_group + ) + ) + + # check that only state events are in state_groups, and all state events are in state_groups + res = self.get_success( + self.store.db_pool.simple_select_list( + table="state_groups", + keyvalues=None, + retcols=("event_id",), + ) + ) + + events = [] + for result in res: + self.assertNotIn(event3.event_id, result) + events.append(result.get("event_id")) + + for event, _ in processed_events_and_context: + if event.is_state(): + self.assertIn(event.event_id, events) + + # check that each unique state has state group in state_groups_state and that the + # type/state key is correct, and check that each state event's state group + # has an entry and prev event in state_group_edges + for event, context in processed_events_and_context: + if event.is_state(): + state = self.get_success( + self.store.db_pool.simple_select_list( + table="state_groups_state", + keyvalues={"state_group": context.state_group_after_event}, + retcols=("type", "state_key"), + ) + ) + self.assertEqual(event.type, state[0].get("type")) + self.assertEqual(event.state_key, state[0].get("state_key")) + + groups = self.get_success( + self.store.db_pool.simple_select_list( + table="state_group_edges", + keyvalues={"state_group": str(context.state_group_after_event)}, + retcols=("*",), + ) + ) + self.assertEqual( + context.state_group_before_event, groups[0].get("prev_state_group") + ) diff --git a/tests/unittest.py b/tests/unittest.py index b21e7f1221..f9160faa1d 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -723,7 +723,7 @@ class HomeserverTestCase(TestCase): event_creator = self.hs.get_event_creation_handler() requester = create_requester(user) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_creator.create_event( requester, { @@ -735,7 +735,7 @@ class HomeserverTestCase(TestCase): prev_event_ids=prev_event_ids, ) ) - + context = self.get_success(unpersisted_context.persist(event)) if soft_failed: event.internal_metadata.soft_failed = True -- cgit 1.5.1 From 3f2ef205e228282a8a744db59115caa4b17da9a1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 27 Feb 2023 13:03:22 +0000 Subject: Small fixes to `MatrixFederationHttpClient` docstrings (#15148) --- changelog.d/15148.doc | 1 + synapse/http/matrixfederationclient.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 changelog.d/15148.doc (limited to 'synapse') diff --git a/changelog.d/15148.doc b/changelog.d/15148.doc new file mode 100644 index 0000000000..4e9e163306 --- /dev/null +++ b/changelog.d/15148.doc @@ -0,0 +1 @@ +Correct small documentation errors in some `MatrixFederationHttpClient` methods. \ No newline at end of file diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 312aab4dcc..3302d4e48a 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -440,7 +440,7 @@ class MatrixFederationHttpClient: Args: request: details of request to be sent - retry_on_dns_fail: true if the request should be retied on DNS failures + retry_on_dns_fail: true if the request should be retried on DNS failures timeout: number of milliseconds to wait for the response headers (including connecting to the server), *for each attempt*. @@ -475,7 +475,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -871,7 +871,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -958,7 +958,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -1036,6 +1036,8 @@ class MatrixFederationHttpClient: args: A dictionary used to create query strings, defaults to None. + retry_on_dns_fail: true if the request should be retried on DNS failures + timeout: number of milliseconds to wait for the response. self._default_timeout (60s) by default. @@ -1063,7 +1065,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -1141,7 +1143,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. @@ -1197,7 +1199,7 @@ class MatrixFederationHttpClient: (except 429). NotRetryingDestination: If we are not yet ready to retry this server. - FederationDeniedError: If this destination is not on our + FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. -- cgit 1.5.1 From 4fc8875876374ec8f97a3b3cc344a4e3abcf769f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 27 Feb 2023 08:26:05 -0500 Subject: Refactor media modules. (#15146) * Removes the `v1` directory from `test.rest.media.v1`. * Moves the non-REST code from `synapse.rest.media.v1` to `synapse.media`. * Flatten the `v1` directory from `synapse.rest.media`, but leave compatiblity with 3rd party media repositories and spam checkers. --- changelog.d/15146.misc | 1 + synapse/_scripts/move_remote_media_to_new_store.py | 2 +- synapse/config/repository.py | 12 +- synapse/events/spamcheck.py | 4 +- synapse/media/_base.py | 479 ++++++++ synapse/media/filepath.py | 410 +++++++ synapse/media/media_repository.py | 1038 ++++++++++++++++ synapse/media/media_storage.py | 374 ++++++ synapse/media/oembed.py | 265 +++++ synapse/media/preview_html.py | 501 ++++++++ synapse/media/storage_provider.py | 181 +++ synapse/media/thumbnailer.py | 221 ++++ synapse/rest/media/config_resource.py | 41 + synapse/rest/media/download_resource.py | 75 ++ synapse/rest/media/media_repository_resource.py | 93 ++ synapse/rest/media/preview_url_resource.py | 869 ++++++++++++++ synapse/rest/media/thumbnail_resource.py | 554 +++++++++ synapse/rest/media/upload_resource.py | 108 ++ synapse/rest/media/v1/_base.py | 470 +------- synapse/rest/media/v1/config_resource.py | 41 - synapse/rest/media/v1/download_resource.py | 76 -- synapse/rest/media/v1/filepath.py | 410 ------- synapse/rest/media/v1/media_repository.py | 1112 ------------------ synapse/rest/media/v1/media_storage.py | 365 +----- synapse/rest/media/v1/oembed.py | 265 ----- synapse/rest/media/v1/preview_html.py | 501 -------- synapse/rest/media/v1/preview_url_resource.py | 871 -------------- synapse/rest/media/v1/storage_provider.py | 172 +-- synapse/rest/media/v1/thumbnail_resource.py | 555 --------- synapse/rest/media/v1/thumbnailer.py | 221 ---- synapse/rest/media/v1/upload_resource.py | 108 -- synapse/server.py | 6 +- tests/media/__init__.py | 13 + tests/media/test_base.py | 38 + tests/media/test_filepath.py | 595 ++++++++++ tests/media/test_html_preview.py | 542 +++++++++ tests/media/test_media_storage.py | 792 +++++++++++++ tests/media/test_oembed.py | 162 +++ tests/rest/admin/test_media.py | 2 +- tests/rest/admin/test_user.py | 2 +- tests/rest/media/test_url_preview.py | 1234 ++++++++++++++++++++ tests/rest/media/v1/__init__.py | 13 - tests/rest/media/v1/test_base.py | 38 - tests/rest/media/v1/test_filepath.py | 595 ---------- tests/rest/media/v1/test_html_preview.py | 542 --------- tests/rest/media/v1/test_media_storage.py | 792 ------------- tests/rest/media/v1/test_oembed.py | 162 --- tests/rest/media/v1/test_url_preview.py | 1234 -------------------- 48 files changed, 8612 insertions(+), 8545 deletions(-) create mode 100644 changelog.d/15146.misc create mode 100644 synapse/media/_base.py create mode 100644 synapse/media/filepath.py create mode 100644 synapse/media/media_repository.py create mode 100644 synapse/media/media_storage.py create mode 100644 synapse/media/oembed.py create mode 100644 synapse/media/preview_html.py create mode 100644 synapse/media/storage_provider.py create mode 100644 synapse/media/thumbnailer.py create mode 100644 synapse/rest/media/config_resource.py create mode 100644 synapse/rest/media/download_resource.py create mode 100644 synapse/rest/media/media_repository_resource.py create mode 100644 synapse/rest/media/preview_url_resource.py create mode 100644 synapse/rest/media/thumbnail_resource.py create mode 100644 synapse/rest/media/upload_resource.py delete mode 100644 synapse/rest/media/v1/config_resource.py delete mode 100644 synapse/rest/media/v1/download_resource.py delete mode 100644 synapse/rest/media/v1/filepath.py delete mode 100644 synapse/rest/media/v1/media_repository.py delete mode 100644 synapse/rest/media/v1/oembed.py delete mode 100644 synapse/rest/media/v1/preview_html.py delete mode 100644 synapse/rest/media/v1/preview_url_resource.py delete mode 100644 synapse/rest/media/v1/thumbnail_resource.py delete mode 100644 synapse/rest/media/v1/thumbnailer.py delete mode 100644 synapse/rest/media/v1/upload_resource.py create mode 100644 tests/media/__init__.py create mode 100644 tests/media/test_base.py create mode 100644 tests/media/test_filepath.py create mode 100644 tests/media/test_html_preview.py create mode 100644 tests/media/test_media_storage.py create mode 100644 tests/media/test_oembed.py create mode 100644 tests/rest/media/test_url_preview.py delete mode 100644 tests/rest/media/v1/__init__.py delete mode 100644 tests/rest/media/v1/test_base.py delete mode 100644 tests/rest/media/v1/test_filepath.py delete mode 100644 tests/rest/media/v1/test_html_preview.py delete mode 100644 tests/rest/media/v1/test_media_storage.py delete mode 100644 tests/rest/media/v1/test_oembed.py delete mode 100644 tests/rest/media/v1/test_url_preview.py (limited to 'synapse') diff --git a/changelog.d/15146.misc b/changelog.d/15146.misc new file mode 100644 index 0000000000..8de5f95239 --- /dev/null +++ b/changelog.d/15146.misc @@ -0,0 +1 @@ +Refactor the media modules. diff --git a/synapse/_scripts/move_remote_media_to_new_store.py b/synapse/_scripts/move_remote_media_to_new_store.py index 819afaaca6..0dd36bee20 100755 --- a/synapse/_scripts/move_remote_media_to_new_store.py +++ b/synapse/_scripts/move_remote_media_to_new_store.py @@ -37,7 +37,7 @@ import os import shutil import sys -from synapse.rest.media.v1.filepath import MediaFilePaths +from synapse.media.filepath import MediaFilePaths logger = logging.getLogger() diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 2da40c09f0..ecb3edbe3a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -178,11 +178,13 @@ class ContentRepositoryConfig(Config): for i, provider_config in enumerate(storage_providers): # We special case the module "file_system" so as not to need to # expose FileStorageProviderBackend - if provider_config["module"] == "file_system": - provider_config["module"] = ( - "synapse.rest.media.v1.storage_provider" - ".FileStorageProviderBackend" - ) + if ( + provider_config["module"] == "file_system" + or provider_config["module"] == "synapse.rest.media.v1.storage_provider" + ): + provider_config[ + "module" + ] = "synapse.media.storage_provider.FileStorageProviderBackend" provider_class, parsed_config = load_module( provider_config, ("media_storage_providers", "" % i) diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 623a2c71ea..765c15bb51 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -33,8 +33,8 @@ from typing_extensions import Literal import synapse from synapse.api.errors import Codes from synapse.logging.opentracing import trace -from synapse.rest.media.v1._base import FileInfo -from synapse.rest.media.v1.media_storage import ReadableFileWrapper +from synapse.media._base import FileInfo +from synapse.media.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour from synapse.types import JsonDict, RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable diff --git a/synapse/media/_base.py b/synapse/media/_base.py new file mode 100644 index 0000000000..ef8334ae25 --- /dev/null +++ b/synapse/media/_base.py @@ -0,0 +1,479 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import urllib +from abc import ABC, abstractmethod +from types import TracebackType +from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type + +import attr + +from twisted.internet.interfaces import IConsumer +from twisted.protocols.basic import FileSender +from twisted.web.server import Request + +from synapse.api.errors import Codes, SynapseError, cs_error +from synapse.http.server import finish_request, respond_with_json +from synapse.http.site import SynapseRequest +from synapse.logging.context import make_deferred_yieldable +from synapse.util.stringutils import is_ascii, parse_and_validate_server_name + +logger = logging.getLogger(__name__) + +# list all text content types that will have the charset default to UTF-8 when +# none is given +TEXT_CONTENT_TYPES = [ + "text/css", + "text/csv", + "text/html", + "text/calendar", + "text/plain", + "text/javascript", + "application/json", + "application/ld+json", + "application/rtf", + "image/svg+xml", + "text/xml", +] + + +def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: + """Parses the server name, media ID and optional file name from the request URI + + Also performs some rough validation on the server name. + + Args: + request: The `Request`. + + Returns: + A tuple containing the parsed server name, media ID and optional file name. + + Raises: + SynapseError(404): if parsing or validation fail for any reason + """ + try: + # The type on postpath seems incorrect in Twisted 21.2.0. + postpath: List[bytes] = request.postpath # type: ignore + assert postpath + + # This allows users to append e.g. /test.png to the URL. Useful for + # clients that parse the URL to see content type. + server_name_bytes, media_id_bytes = postpath[:2] + server_name = server_name_bytes.decode("utf-8") + media_id = media_id_bytes.decode("utf8") + + # Validate the server name, raising if invalid + parse_and_validate_server_name(server_name) + + file_name = None + if len(postpath) > 2: + try: + file_name = urllib.parse.unquote(postpath[-1].decode("utf-8")) + except UnicodeDecodeError: + pass + return server_name, media_id, file_name + except Exception: + raise SynapseError( + 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN + ) + + +def respond_404(request: SynapseRequest) -> None: + respond_with_json( + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, + ) + + +async def respond_with_file( + request: SynapseRequest, + media_type: str, + file_path: str, + file_size: Optional[int] = None, + upload_name: Optional[str] = None, +) -> None: + logger.debug("Responding with %r", file_path) + + if os.path.isfile(file_path): + if file_size is None: + stat = os.stat(file_path) + file_size = stat.st_size + + add_file_headers(request, media_type, file_size, upload_name) + + with open(file_path, "rb") as f: + await make_deferred_yieldable(FileSender().beginFileTransfer(f, request)) + + finish_request(request) + else: + respond_404(request) + + +def add_file_headers( + request: Request, + media_type: str, + file_size: Optional[int], + upload_name: Optional[str], +) -> None: + """Adds the correct response headers in preparation for responding with the + media. + + Args: + request + media_type: The media/content type. + file_size: Size in bytes of the media, if known. + upload_name: The name of the requested file, if any. + """ + + def _quote(x: str) -> str: + return urllib.parse.quote(x.encode("utf-8")) + + # Default to a UTF-8 charset for text content types. + # ex, uses UTF-8 for 'text/css' but not 'text/css; charset=UTF-16' + if media_type.lower() in TEXT_CONTENT_TYPES: + content_type = media_type + "; charset=UTF-8" + else: + content_type = media_type + + request.setHeader(b"Content-Type", content_type.encode("UTF-8")) + if upload_name: + # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. + # + # `filename` is defined to be a `value`, which is defined by RFC2616 + # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token` + # is (essentially) a single US-ASCII word, and a `quoted-string` is a + # US-ASCII string surrounded by double-quotes, using backslash as an + # escape character. Note that %-encoding is *not* permitted. + # + # `filename*` is defined to be an `ext-value`, which is defined in + # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`, + # where `value-chars` is essentially a %-encoded string in the given charset. + # + # [1]: https://tools.ietf.org/html/rfc6266#section-4.1 + # [2]: https://tools.ietf.org/html/rfc2616#section-3.6 + # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1 + + # We avoid the quoted-string version of `filename`, because (a) synapse didn't + # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we + # may as well just do the filename* version. + if _can_encode_filename_as_token(upload_name): + disposition = "inline; filename=%s" % (upload_name,) + else: + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + + request.setHeader(b"Content-Disposition", disposition.encode("ascii")) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") + if file_size is not None: + request.setHeader(b"Content-Length", b"%d" % (file_size,)) + + # Tell web crawlers to not index, archive, or follow links in media. This + # should help to prevent things in the media repo from showing up in web + # search results. + request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex") + + +# separators as defined in RFC2616. SP and HT are handled separately. +# see _can_encode_filename_as_token. +_FILENAME_SEPARATOR_CHARS = { + "(", + ")", + "<", + ">", + "@", + ",", + ";", + ":", + "\\", + '"', + "/", + "[", + "]", + "?", + "=", + "{", + "}", +} + + +def _can_encode_filename_as_token(x: str) -> bool: + for c in x: + # from RFC2616: + # + # token = 1* + # + # separators = "(" | ")" | "<" | ">" | "@" + # | "," | ";" | ":" | "\" | <"> + # | "/" | "[" | "]" | "?" | "=" + # | "{" | "}" | SP | HT + # + # CHAR = + # + # CTL = + # + if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS: + return False + return True + + +async def respond_with_responder( + request: SynapseRequest, + responder: "Optional[Responder]", + media_type: str, + file_size: Optional[int], + upload_name: Optional[str] = None, +) -> None: + """Responds to the request with given responder. If responder is None then + returns 404. + + Args: + request + responder + media_type: The media/content type. + file_size: Size in bytes of the media. If not known it should be None + upload_name: The name of the requested file, if any. + """ + if not responder: + respond_404(request) + return + + # If we have a responder we *must* use it as a context manager. + with responder: + if request._disconnected: + logger.warning( + "Not sending response to request %s, already disconnected.", request + ) + return + + logger.debug("Responding to media request with responder %s", responder) + add_file_headers(request, media_type, file_size, upload_name) + try: + await responder.write_to_consumer(request) + except Exception as e: + # The majority of the time this will be due to the client having gone + # away. Unfortunately, Twisted simply throws a generic exception at us + # in that case. + logger.warning("Failed to write to consumer: %s %s", type(e), e) + + # Unregister the producer, if it has one, so Twisted doesn't complain + if request.producer: + request.unregisterProducer() + + finish_request(request) + + +class Responder(ABC): + """Represents a response that can be streamed to the requester. + + Responder is a context manager which *must* be used, so that any resources + held can be cleaned up. + """ + + @abstractmethod + def write_to_consumer(self, consumer: IConsumer) -> Awaitable: + """Stream response into consumer + + Args: + consumer: The consumer to stream into. + + Returns: + Resolves once the response has finished being written + """ + raise NotImplementedError() + + def __enter__(self) -> None: # noqa: B027 + pass + + def __exit__( # noqa: B027 + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + pass + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ThumbnailInfo: + """Details about a generated thumbnail.""" + + width: int + height: int + method: str + # Content type of thumbnail, e.g. image/png + type: str + # The size of the media file, in bytes. + length: Optional[int] = None + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class FileInfo: + """Details about a requested/uploaded file.""" + + # The server name where the media originated from, or None if local. + server_name: Optional[str] + # The local ID of the file. For local files this is the same as the media_id + file_id: str + # If the file is for the url preview cache + url_cache: bool = False + # Whether the file is a thumbnail or not. + thumbnail: Optional[ThumbnailInfo] = None + + # The below properties exist to maintain compatibility with third-party modules. + @property + def thumbnail_width(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.width + + @property + def thumbnail_height(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.height + + @property + def thumbnail_method(self) -> Optional[str]: + if not self.thumbnail: + return None + return self.thumbnail.method + + @property + def thumbnail_type(self) -> Optional[str]: + if not self.thumbnail: + return None + return self.thumbnail.type + + @property + def thumbnail_length(self) -> Optional[int]: + if not self.thumbnail: + return None + return self.thumbnail.length + + +def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]: + """ + Get the filename of the downloaded file by inspecting the + Content-Disposition HTTP header. + + Args: + headers: The HTTP request headers. + + Returns: + The filename, or None. + """ + content_disposition = headers.get(b"Content-Disposition", [b""]) + + # No header, bail out. + if not content_disposition[0]: + return None + + _, params = _parse_header(content_disposition[0]) + + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get(b"filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith(b"utf-8''"): + upload_name_utf8 = upload_name_utf8[7:] + # We have a filename*= section. This MUST be ASCII, and any UTF-8 + # bytes are %-quoted. + try: + # Once it is decoded, we can then unquote the %-encoded + # parts strictly into a unicode string. + upload_name = urllib.parse.unquote( + upload_name_utf8.decode("ascii"), errors="strict" + ) + except UnicodeDecodeError: + # Incorrect UTF-8. + pass + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get(b"filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii.decode("ascii") + + # This may be None here, indicating we did not find a matching name. + return upload_name + + +def _parse_header(line: bytes) -> Tuple[bytes, Dict[bytes, bytes]]: + """Parse a Content-type like header. + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + line: header to be parsed + + Returns: + The main content-type, followed by the parameter dictionary + """ + parts = _parseparam(b";" + line) + key = next(parts) + pdict = {} + for p in parts: + i = p.find(b"=") + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1 :].strip() + + # strip double-quotes + if len(value) >= 2 and value[0:1] == value[-1:] == b'"': + value = value[1:-1] + value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"') + pdict[name] = value + + return key, pdict + + +def _parseparam(s: bytes) -> Generator[bytes, None, None]: + """Generator which splits the input on ;, respecting double-quoted sequences + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + s: header to be parsed + + Returns: + The split input + """ + while s[:1] == b";": + s = s[1:] + + # look for the next ; + end = s.find(b";") + + # if there is an odd number of " marks between here and the next ;, skip to the + # next ; instead + while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: + end = s.find(b";", end + 1) + + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] diff --git a/synapse/media/filepath.py b/synapse/media/filepath.py new file mode 100644 index 0000000000..1f6441c412 --- /dev/null +++ b/synapse/media/filepath.py @@ -0,0 +1,410 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2020-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import os +import re +import string +from typing import Any, Callable, List, TypeVar, Union, cast + +NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") + + +F = TypeVar("F", bound=Callable[..., str]) + + +def _wrap_in_base_path(func: F) -> F: + """Takes a function that returns a relative path and turns it into an + absolute path based on the location of the primary media store + """ + + @functools.wraps(func) + def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str: + path = func(self, *args, **kwargs) + return os.path.join(self.base_path, path) + + return cast(F, _wrapped) + + +GetPathMethod = TypeVar( + "GetPathMethod", bound=Union[Callable[..., str], Callable[..., List[str]]] +) + + +def _wrap_with_jail_check(relative: bool) -> Callable[[GetPathMethod], GetPathMethod]: + """Wraps a path-returning method to check that the returned path(s) do not escape + the media store directory. + + The path-returning method may return either a single path, or a list of paths. + + The check is not expected to ever fail, unless `func` is missing a call to + `_validate_path_component`, or `_validate_path_component` is buggy. + + Args: + relative: A boolean indicating whether the wrapped method returns paths relative + to the media store directory. + + Returns: + A method which will wrap a path-returning method, adding a check to ensure that + the returned path(s) lie within the media store directory. The check will raise + a `ValueError` if it fails. + """ + + def _wrap_with_jail_check_inner(func: GetPathMethod) -> GetPathMethod: + @functools.wraps(func) + def _wrapped( + self: "MediaFilePaths", *args: Any, **kwargs: Any + ) -> Union[str, List[str]]: + path_or_paths = func(self, *args, **kwargs) + + if isinstance(path_or_paths, list): + paths_to_check = path_or_paths + else: + paths_to_check = [path_or_paths] + + for path in paths_to_check: + # Construct the path that will ultimately be used. + # We cannot guess whether `path` is relative to the media store + # directory, since the media store directory may itself be a relative + # path. + if relative: + path = os.path.join(self.base_path, path) + normalized_path = os.path.normpath(path) + + # Now that `normpath` has eliminated `../`s and `./`s from the path, + # `os.path.commonpath` can be used to check whether it lies within the + # media store directory. + if ( + os.path.commonpath([normalized_path, self.normalized_base_path]) + != self.normalized_base_path + ): + # The path resolves to outside the media store directory, + # or `self.base_path` is `.`, which is an unlikely configuration. + raise ValueError(f"Invalid media store path: {path!r}") + + # Note that `os.path.normpath`/`abspath` has a subtle caveat: + # `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a + # different path if `a/b/c` is a symlink. That is, the check above is + # not perfect and may allow a certain restricted subset of untrustworthy + # paths through. Since the check above is secondary to the main + # `_validate_path_component` checks, it's less important for it to be + # perfect. + # + # As an alternative, `os.path.realpath` will resolve symlinks, but + # proves problematic if there are symlinks inside the media store. + # eg. if `url_store/` is symlinked to elsewhere, its canonical path + # won't match that of the main media store directory. + + return path_or_paths + + return cast(GetPathMethod, _wrapped) + + return _wrap_with_jail_check_inner + + +ALLOWED_CHARACTERS = set( + string.ascii_letters + + string.digits + + "_-" + + ".[]:" # Domain names, IPv6 addresses and ports in server names +) +FORBIDDEN_NAMES = { + "", + os.path.curdir, # "." for the current platform + os.path.pardir, # ".." for the current platform +} + + +def _validate_path_component(name: str) -> str: + """Checks that the given string can be safely used as a path component + + Args: + name: The path component to check. + + Returns: + The path component if valid. + + Raises: + ValueError: If `name` cannot be safely used as a path component. + """ + if not ALLOWED_CHARACTERS.issuperset(name) or name in FORBIDDEN_NAMES: + raise ValueError(f"Invalid path component: {name!r}") + + return name + + +class MediaFilePaths: + """Describes where files are stored on disk. + + Most of the functions have a `*_rel` variant which returns a file path that + is relative to the base media store path. This is mainly used when we want + to write to the backup media store (when one is configured) + """ + + def __init__(self, primary_base_path: str): + self.base_path = primary_base_path + self.normalized_base_path = os.path.normpath(self.base_path) + + # Refuse to initialize if paths cannot be validated correctly for the current + # platform. + assert os.path.sep not in ALLOWED_CHARACTERS + assert os.path.altsep not in ALLOWED_CHARACTERS + # On Windows, paths have all sorts of weirdness which `_validate_path_component` + # does not consider. In any case, the remote media store can't work correctly + # for certain homeservers there, since ":"s aren't allowed in paths. + assert os.name == "posix" + + @_wrap_with_jail_check(relative=True) + def local_media_filepath_rel(self, media_id: str) -> str: + return os.path.join( + "local_content", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ) + + local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) + + @_wrap_with_jail_check(relative=True) + def local_media_thumbnail_rel( + self, media_id: str, width: int, height: int, content_type: str, method: str + ) -> str: + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) + return os.path.join( + "local_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + _validate_path_component(file_name), + ) + + local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + + @_wrap_with_jail_check(relative=False) + def local_media_thumbnail_dir(self, media_id: str) -> str: + """ + Retrieve the local store path of thumbnails of a given media_id + + Args: + media_id: The media ID to query. + Returns: + Path of local_thumbnails from media_id + """ + return os.path.join( + self.base_path, + "local_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ) + + @_wrap_with_jail_check(relative=True) + def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str: + return os.path.join( + "remote_content", + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + ) + + remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) + + @_wrap_with_jail_check(relative=True) + def remote_media_thumbnail_rel( + self, + server_name: str, + file_id: str, + width: int, + height: int, + content_type: str, + method: str, + ) -> str: + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) + return os.path.join( + "remote_thumbnail", + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + _validate_path_component(file_name), + ) + + remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel) + + # Legacy path that was used to store thumbnails previously. + # Should be removed after some time, when most of the thumbnails are stored + # using the new path. + @_wrap_with_jail_check(relative=True) + def remote_media_thumbnail_rel_legacy( + self, server_name: str, file_id: str, width: int, height: int, content_type: str + ) -> str: + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) + return os.path.join( + "remote_thumbnail", + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + _validate_path_component(file_name), + ) + + @_wrap_with_jail_check(relative=False) + def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str: + return os.path.join( + self.base_path, + "remote_thumbnail", + _validate_path_component(server_name), + _validate_path_component(file_id[0:2]), + _validate_path_component(file_id[2:4]), + _validate_path_component(file_id[4:]), + ) + + @_wrap_with_jail_check(relative=True) + def url_cache_filepath_rel(self, media_id: str) -> str: + if NEW_FORMAT_ID_RE.match(media_id): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + return os.path.join( + "url_cache", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ) + else: + return os.path.join( + "url_cache", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ) + + url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) + + @_wrap_with_jail_check(relative=False) + def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]: + "The dirs to try and remove if we delete the media_id file" + if NEW_FORMAT_ID_RE.match(media_id): + return [ + os.path.join( + self.base_path, "url_cache", _validate_path_component(media_id[:10]) + ) + ] + else: + return [ + os.path.join( + self.base_path, + "url_cache", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + ), + os.path.join( + self.base_path, "url_cache", _validate_path_component(media_id[0:2]) + ), + ] + + @_wrap_with_jail_check(relative=True) + def url_cache_thumbnail_rel( + self, media_id: str, width: int, height: int, content_type: str, method: str + ) -> str: + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) + + if NEW_FORMAT_ID_RE.match(media_id): + return os.path.join( + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + _validate_path_component(file_name), + ) + else: + return os.path.join( + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + _validate_path_component(file_name), + ) + + url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) + + @_wrap_with_jail_check(relative=True) + def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + + if NEW_FORMAT_ID_RE.match(media_id): + return os.path.join( + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ) + else: + return os.path.join( + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ) + + url_cache_thumbnail_directory = _wrap_in_base_path( + url_cache_thumbnail_directory_rel + ) + + @_wrap_with_jail_check(relative=False) + def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: + "The dirs to try and remove if we delete the media_id thumbnails" + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + if NEW_FORMAT_ID_RE.match(media_id): + return [ + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + _validate_path_component(media_id[11:]), + ), + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[:10]), + ), + ] + else: + return [ + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + _validate_path_component(media_id[4:]), + ), + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), + _validate_path_component(media_id[2:4]), + ), + os.path.join( + self.base_path, + "url_cache_thumbnails", + _validate_path_component(media_id[0:2]), + ), + ] diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py new file mode 100644 index 0000000000..b81e3c2b0c --- /dev/null +++ b/synapse/media/media_repository.py @@ -0,0 +1,1038 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import errno +import logging +import os +import shutil +from io import BytesIO +from typing import IO, TYPE_CHECKING, Dict, List, Optional, Set, Tuple + +from matrix_common.types.mxc_uri import MXCUri + +import twisted.internet.error +import twisted.web.http +from twisted.internet.defer import Deferred + +from synapse.api.errors import ( + FederationDeniedError, + HttpResponseException, + NotFoundError, + RequestSendFailed, + SynapseError, +) +from synapse.config.repository import ThumbnailRequirement +from synapse.http.site import SynapseRequest +from synapse.logging.context import defer_to_thread +from synapse.media._base import ( + FileInfo, + Responder, + ThumbnailInfo, + get_filename_from_headers, + respond_404, + respond_with_responder, +) +from synapse.media.filepath import MediaFilePaths +from synapse.media.media_storage import MediaStorage +from synapse.media.storage_provider import StorageProviderWrapper +from synapse.media.thumbnailer import Thumbnailer, ThumbnailError +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.types import UserID +from synapse.util.async_helpers import Linearizer +from synapse.util.retryutils import NotRetryingDestination +from synapse.util.stringutils import random_string + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + +# How often to run the background job to update the "recently accessed" +# attribute of local and remote media. +UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 # 1 minute +# How often to run the background job to check for local and remote media +# that should be purged according to the configured media retention settings. +MEDIA_RETENTION_CHECK_PERIOD_MS = 60 * 60 * 1000 # 1 hour + + +class MediaRepository: + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.auth = hs.get_auth() + self.client = hs.get_federation_http_client() + self.clock = hs.get_clock() + self.server_name = hs.hostname + self.store = hs.get_datastores().main + self.max_upload_size = hs.config.media.max_upload_size + self.max_image_pixels = hs.config.media.max_image_pixels + + Thumbnailer.set_limits(self.max_image_pixels) + + self.primary_base_path: str = hs.config.media.media_store_path + self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path) + + self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails + self.thumbnail_requirements = hs.config.media.thumbnail_requirements + + self.remote_media_linearizer = Linearizer(name="media_remote") + + self.recently_accessed_remotes: Set[Tuple[str, str]] = set() + self.recently_accessed_locals: Set[str] = set() + + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) + + # List of StorageProviders where we should search for media and + # potentially upload to. + storage_providers = [] + + for ( + clz, + provider_config, + wrapper_config, + ) in hs.config.media.media_storage_providers: + backend = clz(hs, provider_config) + provider = StorageProviderWrapper( + backend, + store_local=wrapper_config.store_local, + store_remote=wrapper_config.store_remote, + store_synchronous=wrapper_config.store_synchronous, + ) + storage_providers.append(provider) + + self.media_storage = MediaStorage( + self.hs, self.primary_base_path, self.filepaths, storage_providers + ) + + self.clock.looping_call( + self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS + ) + + # Media retention configuration options + self._media_retention_local_media_lifetime_ms = ( + hs.config.media.media_retention_local_media_lifetime_ms + ) + self._media_retention_remote_media_lifetime_ms = ( + hs.config.media.media_retention_remote_media_lifetime_ms + ) + + # Check whether local or remote media retention is configured + if ( + hs.config.media.media_retention_local_media_lifetime_ms is not None + or hs.config.media.media_retention_remote_media_lifetime_ms is not None + ): + # Run the background job to apply media retention rules routinely, + # with the duration between runs dictated by the homeserver config. + self.clock.looping_call( + self._start_apply_media_retention_rules, + MEDIA_RETENTION_CHECK_PERIOD_MS, + ) + + def _start_update_recently_accessed(self) -> Deferred: + return run_as_background_process( + "update_recently_accessed_media", self._update_recently_accessed + ) + + def _start_apply_media_retention_rules(self) -> Deferred: + return run_as_background_process( + "apply_media_retention_rules", self._apply_media_retention_rules + ) + + async def _update_recently_accessed(self) -> None: + remote_media = self.recently_accessed_remotes + self.recently_accessed_remotes = set() + + local_media = self.recently_accessed_locals + self.recently_accessed_locals = set() + + await self.store.update_cached_last_access_time( + local_media, remote_media, self.clock.time_msec() + ) + + def mark_recently_accessed(self, server_name: Optional[str], media_id: str) -> None: + """Mark the given media as recently accessed. + + Args: + server_name: Origin server of media, or None if local + media_id: The media ID of the content + """ + if server_name: + self.recently_accessed_remotes.add((server_name, media_id)) + else: + self.recently_accessed_locals.add(media_id) + + async def create_content( + self, + media_type: str, + upload_name: Optional[str], + content: IO, + content_length: int, + auth_user: UserID, + ) -> MXCUri: + """Store uploaded content for a local user and return the mxc URL + + Args: + media_type: The content type of the file. + upload_name: The name of the file, if provided. + content: A file like object that is the content to store + content_length: The length of the content + auth_user: The user_id of the uploader + + Returns: + The mxc url of the stored content + """ + + media_id = random_string(24) + + file_info = FileInfo(server_name=None, file_id=media_id) + + fname = await self.media_storage.store_file(content, file_info) + + logger.info("Stored local media in file %r", fname) + + await self.store.store_local_media( + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=content_length, + user_id=auth_user, + ) + + await self._generate_thumbnails(None, media_id, media_id, media_type) + + return MXCUri(self.server_name, media_id) + + async def get_local_media( + self, request: SynapseRequest, media_id: str, name: Optional[str] + ) -> None: + """Responds to requests for local media, if exists, or returns 404. + + Args: + request: The incoming request. + media_id: The media ID of the content. (This is the same as + the file_id for local content.) + name: Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Returns: + Resolves once a response has successfully been written to request + """ + media_info = await self.store.get_local_media(media_id) + if not media_info or media_info["quarantined_by"]: + respond_404(request) + return + + self.mark_recently_accessed(None, media_id) + + media_type = media_info["media_type"] + if not media_type: + media_type = "application/octet-stream" + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + url_cache = media_info["url_cache"] + + file_info = FileInfo(None, media_id, url_cache=bool(url_cache)) + + responder = await self.media_storage.fetch_media(file_info) + await respond_with_responder( + request, responder, media_type, media_length, upload_name + ) + + async def get_remote_media( + self, + request: SynapseRequest, + server_name: str, + media_id: str, + name: Optional[str], + ) -> None: + """Respond to requests for remote media. + + Args: + request: The incoming request. + server_name: Remote server_name where the media originated. + media_id: The media ID of the content (as defined by the remote server). + name: Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Returns: + Resolves once a response has successfully been written to request + """ + if ( + self.federation_domain_whitelist is not None + and server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + + self.mark_recently_accessed(server_name, media_id) + + # We linearize here to ensure that we don't try and download remote + # media multiple times concurrently + key = (server_name, media_id) + async with self.remote_media_linearizer.queue(key): + responder, media_info = await self._get_remote_media_impl( + server_name, media_id + ) + + # We deliberately stream the file outside the lock + if responder: + media_type = media_info["media_type"] + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + await respond_with_responder( + request, responder, media_type, media_length, upload_name + ) + else: + respond_404(request) + + async def get_remote_media_info(self, server_name: str, media_id: str) -> dict: + """Gets the media info associated with the remote file, downloading + if necessary. + + Args: + server_name: Remote server_name where the media originated. + media_id: The media ID of the content (as defined by the remote server). + + Returns: + The media info of the file + """ + if ( + self.federation_domain_whitelist is not None + and server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + + # We linearize here to ensure that we don't try and download remote + # media multiple times concurrently + key = (server_name, media_id) + async with self.remote_media_linearizer.queue(key): + responder, media_info = await self._get_remote_media_impl( + server_name, media_id + ) + + # Ensure we actually use the responder so that it releases resources + if responder: + with responder: + pass + + return media_info + + async def _get_remote_media_impl( + self, server_name: str, media_id: str + ) -> Tuple[Optional[Responder], dict]: + """Looks for media in local cache, if not there then attempt to + download from remote server. + + Args: + server_name: Remote server_name where the media originated. + media_id: The media ID of the content (as defined by the + remote server). + + Returns: + A tuple of responder and the media info of the file. + """ + media_info = await self.store.get_cached_remote_media(server_name, media_id) + + # file_id is the ID we use to track the file locally. If we've already + # seen the file then reuse the existing ID, otherwise generate a new + # one. + + # If we have an entry in the DB, try and look for it + if media_info: + file_id = media_info["filesystem_id"] + file_info = FileInfo(server_name, file_id) + + if media_info["quarantined_by"]: + logger.info("Media is quarantined") + raise NotFoundError() + + if not media_info["media_type"]: + media_info["media_type"] = "application/octet-stream" + + responder = await self.media_storage.fetch_media(file_info) + if responder: + return responder, media_info + + # Failed to find the file anywhere, lets download it. + + try: + media_info = await self._download_remote_file( + server_name, + media_id, + ) + except SynapseError: + raise + except Exception as e: + # An exception may be because we downloaded media in another + # process, so let's check if we magically have the media. + media_info = await self.store.get_cached_remote_media(server_name, media_id) + if not media_info: + raise e + + file_id = media_info["filesystem_id"] + if not media_info["media_type"]: + media_info["media_type"] = "application/octet-stream" + file_info = FileInfo(server_name, file_id) + + # We generate thumbnails even if another process downloaded the media + # as a) it's conceivable that the other download request dies before it + # generates thumbnails, but mainly b) we want to be sure the thumbnails + # have finished being generated before responding to the client, + # otherwise they'll request thumbnails and get a 404 if they're not + # ready yet. + await self._generate_thumbnails( + server_name, media_id, file_id, media_info["media_type"] + ) + + responder = await self.media_storage.fetch_media(file_info) + return responder, media_info + + async def _download_remote_file( + self, + server_name: str, + media_id: str, + ) -> dict: + """Attempt to download the remote file from the given server name, + using the given file_id as the local id. + + Args: + server_name: Originating server + media_id: The media ID of the content (as defined by the + remote server). This is different than the file_id, which is + locally generated. + file_id: Local file ID + + Returns: + The media info of the file. + """ + + file_id = random_string(24) + + file_info = FileInfo(server_name=server_name, file_id=file_id) + + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + request_path = "/".join( + ("/_matrix/media/r0/download", server_name, media_id) + ) + try: + length, headers = await self.client.get_file( + server_name, + request_path, + output_stream=f, + max_size=self.max_upload_size, + args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false" + }, + ) + except RequestSendFailed as e: + logger.warning( + "Request failed fetching remote media %s/%s: %r", + server_name, + media_id, + e, + ) + raise SynapseError(502, "Failed to fetch remote media") + + except HttpResponseException as e: + logger.warning( + "HTTP error fetching remote media %s/%s: %s", + server_name, + media_id, + e.response, + ) + if e.code == twisted.web.http.NOT_FOUND: + raise e.to_synapse_error() + raise SynapseError(502, "Failed to fetch remote media") + + except SynapseError: + logger.warning( + "Failed to fetch remote media %s/%s", server_name, media_id + ) + raise + except NotRetryingDestination: + logger.warning("Not retrying destination %r", server_name) + raise SynapseError(502, "Failed to fetch remote media") + except Exception: + logger.exception( + "Failed to fetch remote media %s/%s", server_name, media_id + ) + raise SynapseError(502, "Failed to fetch remote media") + + await finish() + + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode("ascii") + else: + media_type = "application/octet-stream" + upload_name = get_filename_from_headers(headers) + time_now_ms = self.clock.time_msec() + + # Multiple remote media download requests can race (when using + # multiple media repos), so this may throw a violation constraint + # exception. If it does we'll delete the newly downloaded file from + # disk (as we're in the ctx manager). + # + # However: we've already called `finish()` so we may have also + # written to the storage providers. This is preferable to the + # alternative where we call `finish()` *after* this, where we could + # end up having an entry in the DB but fail to write the files to + # the storage providers. + await self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=length, + filesystem_id=file_id, + ) + + logger.info("Stored remote media in file %r", fname) + + media_info = { + "media_type": media_type, + "media_length": length, + "upload_name": upload_name, + "created_ts": time_now_ms, + "filesystem_id": file_id, + } + + return media_info + + def _get_thumbnail_requirements( + self, media_type: str + ) -> Tuple[ThumbnailRequirement, ...]: + scpos = media_type.find(";") + if scpos > 0: + media_type = media_type[:scpos] + return self.thumbnail_requirements.get(media_type, ()) + + def _generate_thumbnail( + self, + thumbnailer: Thumbnailer, + t_width: int, + t_height: int, + t_method: str, + t_type: str, + ) -> Optional[BytesIO]: + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, + m_height, + self.max_image_pixels, + ) + return None + + if thumbnailer.transpose_method is not None: + m_width, m_height = thumbnailer.transpose() + + if t_method == "crop": + return thumbnailer.crop(t_width, t_height, t_type) + elif t_method == "scale": + t_width, t_height = thumbnailer.aspect(t_width, t_height) + t_width = min(m_width, t_width) + t_height = min(m_height, t_height) + return thumbnailer.scale(t_width, t_height, t_type) + + return None + + async def generate_local_exact_thumbnail( + self, + media_id: str, + t_width: int, + t_height: int, + t_method: str, + t_type: str, + url_cache: bool, + ) -> Optional[str]: + input_path = await self.media_storage.ensure_media_is_in_local_cache( + FileInfo(None, media_id, url_cache=url_cache) + ) + + try: + thumbnailer = Thumbnailer(input_path) + except ThumbnailError as e: + logger.warning( + "Unable to generate a thumbnail for local media %s using a method of %s and type of %s: %s", + media_id, + t_method, + t_type, + e, + ) + return None + + with thumbnailer: + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + self._generate_thumbnail, + thumbnailer, + t_width, + t_height, + t_method, + t_type, + ) + + if t_byte_source: + try: + file_info = FileInfo( + server_name=None, + file_id=media_id, + url_cache=url_cache, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), + ) + + output_path = await self.media_storage.store_file( + t_byte_source, file_info + ) + finally: + t_byte_source.close() + + logger.info("Stored thumbnail in file %r", output_path) + + t_len = os.path.getsize(output_path) + + await self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + return output_path + + # Could not generate thumbnail. + return None + + async def generate_remote_exact_thumbnail( + self, + server_name: str, + file_id: str, + media_id: str, + t_width: int, + t_height: int, + t_method: str, + t_type: str, + ) -> Optional[str]: + input_path = await self.media_storage.ensure_media_is_in_local_cache( + FileInfo(server_name, file_id) + ) + + try: + thumbnailer = Thumbnailer(input_path) + except ThumbnailError as e: + logger.warning( + "Unable to generate a thumbnail for remote media %s from %s using a method of %s and type of %s: %s", + media_id, + server_name, + t_method, + t_type, + e, + ) + return None + + with thumbnailer: + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + self._generate_thumbnail, + thumbnailer, + t_width, + t_height, + t_method, + t_type, + ) + + if t_byte_source: + try: + file_info = FileInfo( + server_name=server_name, + file_id=file_id, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), + ) + + output_path = await self.media_storage.store_file( + t_byte_source, file_info + ) + finally: + t_byte_source.close() + + logger.info("Stored thumbnail in file %r", output_path) + + t_len = os.path.getsize(output_path) + + await self.store.store_remote_media_thumbnail( + server_name, + media_id, + file_id, + t_width, + t_height, + t_type, + t_method, + t_len, + ) + + return output_path + + # Could not generate thumbnail. + return None + + async def _generate_thumbnails( + self, + server_name: Optional[str], + media_id: str, + file_id: str, + media_type: str, + url_cache: bool = False, + ) -> Optional[dict]: + """Generate and store thumbnails for an image. + + Args: + server_name: The server name if remote media, else None if local + media_id: The media ID of the content. (This is the same as + the file_id for local content) + file_id: Local file ID + media_type: The content type of the file + url_cache: If we are thumbnailing images downloaded for the URL cache, + used exclusively by the url previewer + + Returns: + Dict with "width" and "height" keys of original image or None if the + media cannot be thumbnailed. + """ + requirements = self._get_thumbnail_requirements(media_type) + if not requirements: + return None + + input_path = await self.media_storage.ensure_media_is_in_local_cache( + FileInfo(server_name, file_id, url_cache=url_cache) + ) + + try: + thumbnailer = Thumbnailer(input_path) + except ThumbnailError as e: + logger.warning( + "Unable to generate thumbnails for remote media %s from %s of type %s: %s", + media_id, + server_name, + media_type, + e, + ) + return None + + with thumbnailer: + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, + m_height, + self.max_image_pixels, + ) + return None + + if thumbnailer.transpose_method is not None: + m_width, m_height = await defer_to_thread( + self.hs.get_reactor(), thumbnailer.transpose + ) + + # We deduplicate the thumbnail sizes by ignoring the cropped versions if + # they have the same dimensions of a scaled one. + thumbnails: Dict[Tuple[int, int, str], str] = {} + for requirement in requirements: + if requirement.method == "crop": + thumbnails.setdefault( + (requirement.width, requirement.height, requirement.media_type), + requirement.method, + ) + elif requirement.method == "scale": + t_width, t_height = thumbnailer.aspect( + requirement.width, requirement.height + ) + t_width = min(m_width, t_width) + t_height = min(m_height, t_height) + thumbnails[ + (t_width, t_height, requirement.media_type) + ] = requirement.method + + # Now we generate the thumbnails for each dimension, store it + for (t_width, t_height, t_type), t_method in thumbnails.items(): + # Generate the thumbnail + if t_method == "crop": + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + thumbnailer.crop, + t_width, + t_height, + t_type, + ) + elif t_method == "scale": + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + thumbnailer.scale, + t_width, + t_height, + t_type, + ) + else: + logger.error("Unrecognized method: %r", t_method) + continue + + if not t_byte_source: + continue + + file_info = FileInfo( + server_name=server_name, + file_id=file_id, + url_cache=url_cache, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), + ) + + with self.media_storage.store_into_file(file_info) as ( + f, + fname, + finish, + ): + try: + await self.media_storage.write_to_file(t_byte_source, f) + await finish() + finally: + t_byte_source.close() + + t_len = os.path.getsize(fname) + + # Write to database + if server_name: + # Multiple remote media download requests can race (when + # using multiple media repos), so this may throw a violation + # constraint exception. If it does we'll delete the newly + # generated thumbnail from disk (as we're in the ctx + # manager). + # + # However: we've already called `finish()` so we may have + # also written to the storage providers. This is preferable + # to the alternative where we call `finish()` *after* this, + # where we could end up having an entry in the DB but fail + # to write the files to the storage providers. + try: + await self.store.store_remote_media_thumbnail( + server_name, + media_id, + file_id, + t_width, + t_height, + t_type, + t_method, + t_len, + ) + except Exception as e: + thumbnail_exists = ( + await self.store.get_remote_media_thumbnail( + server_name, + media_id, + t_width, + t_height, + t_type, + ) + ) + if not thumbnail_exists: + raise e + else: + await self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + return {"width": m_width, "height": m_height} + + async def _apply_media_retention_rules(self) -> None: + """ + Purge old local and remote media according to the media retention rules + defined in the homeserver config. + """ + # Purge remote media + if self._media_retention_remote_media_lifetime_ms is not None: + # Calculate a threshold timestamp derived from the configured lifetime. Any + # media that has not been accessed since this timestamp will be removed. + remote_media_threshold_timestamp_ms = ( + self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms + ) + + logger.info( + "Purging remote media last accessed before" + f" {remote_media_threshold_timestamp_ms}" + ) + + await self.delete_old_remote_media( + before_ts=remote_media_threshold_timestamp_ms + ) + + # And now do the same for local media + if self._media_retention_local_media_lifetime_ms is not None: + # This works the same as the remote media threshold + local_media_threshold_timestamp_ms = ( + self.clock.time_msec() - self._media_retention_local_media_lifetime_ms + ) + + logger.info( + "Purging local media last accessed before" + f" {local_media_threshold_timestamp_ms}" + ) + + await self.delete_old_local_media( + before_ts=local_media_threshold_timestamp_ms, + keep_profiles=True, + delete_quarantined_media=False, + delete_protected_media=False, + ) + + async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]: + old_media = await self.store.get_remote_media_ids( + before_ts, include_quarantined_media=False + ) + + deleted = 0 + + for media in old_media: + origin = media["media_origin"] + media_id = media["media_id"] + file_id = media["filesystem_id"] + key = (origin, media_id) + + logger.info("Deleting: %r", key) + + # TODO: Should we delete from the backup store + + async with self.remote_media_linearizer.queue(key): + full_path = self.filepaths.remote_media_filepath(origin, file_id) + try: + os.remove(full_path) + except OSError as e: + logger.warning("Failed to remove file: %r", full_path) + if e.errno == errno.ENOENT: + pass + else: + continue + + thumbnail_dir = self.filepaths.remote_media_thumbnail_dir( + origin, file_id + ) + shutil.rmtree(thumbnail_dir, ignore_errors=True) + + await self.store.delete_remote_media(origin, media_id) + deleted += 1 + + return {"deleted": deleted} + + async def delete_local_media_ids( + self, media_ids: List[str] + ) -> Tuple[List[str], int]: + """ + Delete the given local or remote media ID from this server + + Args: + media_id: The media ID to delete. + Returns: + A tuple of (list of deleted media IDs, total deleted media IDs). + """ + return await self._remove_local_media_from_disk(media_ids) + + async def delete_old_local_media( + self, + before_ts: int, + size_gt: int = 0, + keep_profiles: bool = True, + delete_quarantined_media: bool = False, + delete_protected_media: bool = False, + ) -> Tuple[List[str], int]: + """ + Delete local or remote media from this server by size and timestamp. Removes + media files, any thumbnails and cached URLs. + + Args: + before_ts: Unix timestamp in ms. + Files that were last used before this timestamp will be deleted. + size_gt: Size of the media in bytes. Files that are larger will be deleted. + keep_profiles: Switch to delete also files that are still used in image data + (e.g user profile, room avatar). If false these files will be deleted. + delete_quarantined_media: If True, media marked as quarantined will be deleted. + delete_protected_media: If True, media marked as protected will be deleted. + + Returns: + A tuple of (list of deleted media IDs, total deleted media IDs). + """ + old_media = await self.store.get_local_media_ids( + before_ts, + size_gt, + keep_profiles, + include_quarantined_media=delete_quarantined_media, + include_protected_media=delete_protected_media, + ) + return await self._remove_local_media_from_disk(old_media) + + async def _remove_local_media_from_disk( + self, media_ids: List[str] + ) -> Tuple[List[str], int]: + """ + Delete local or remote media from this server. Removes media files, + any thumbnails and cached URLs. + + Args: + media_ids: List of media_id to delete + Returns: + A tuple of (list of deleted media IDs, total deleted media IDs). + """ + removed_media = [] + for media_id in media_ids: + logger.info("Deleting media with ID '%s'", media_id) + full_path = self.filepaths.local_media_filepath(media_id) + try: + os.remove(full_path) + except OSError as e: + logger.warning("Failed to remove file: %r: %s", full_path, e) + if e.errno == errno.ENOENT: + pass + else: + continue + + thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id) + shutil.rmtree(thumbnail_dir, ignore_errors=True) + + await self.store.delete_remote_media(self.server_name, media_id) + + await self.store.delete_url_cache((media_id,)) + await self.store.delete_url_cache_media((media_id,)) + + removed_media.append(media_id) + + return removed_media, len(removed_media) diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py new file mode 100644 index 0000000000..a7e22a91e1 --- /dev/null +++ b/synapse/media/media_storage.py @@ -0,0 +1,374 @@ +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import logging +import os +import shutil +from types import TracebackType +from typing import ( + IO, + TYPE_CHECKING, + Any, + Awaitable, + BinaryIO, + Callable, + Generator, + Optional, + Sequence, + Tuple, + Type, +) + +import attr + +from twisted.internet.defer import Deferred +from twisted.internet.interfaces import IConsumer +from twisted.protocols.basic import FileSender + +import synapse +from synapse.api.errors import NotFoundError +from synapse.logging.context import defer_to_thread, make_deferred_yieldable +from synapse.util import Clock +from synapse.util.file_consumer import BackgroundFileConsumer + +from ._base import FileInfo, Responder +from .filepath import MediaFilePaths + +if TYPE_CHECKING: + from synapse.media.storage_provider import StorageProvider + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class MediaStorage: + """Responsible for storing/fetching files from local sources. + + Args: + hs + local_media_directory: Base path where we store media on disk + filepaths + storage_providers: List of StorageProvider that are used to fetch and store files. + """ + + def __init__( + self, + hs: "HomeServer", + local_media_directory: str, + filepaths: MediaFilePaths, + storage_providers: Sequence["StorageProvider"], + ): + self.hs = hs + self.reactor = hs.get_reactor() + self.local_media_directory = local_media_directory + self.filepaths = filepaths + self.storage_providers = storage_providers + self.spam_checker = hs.get_spam_checker() + self.clock = hs.get_clock() + + async def store_file(self, source: IO, file_info: FileInfo) -> str: + """Write `source` to the on disk media store, and also any other + configured storage providers + + Args: + source: A file like object that should be written + file_info: Info about the file to store + + Returns: + the file path written to in the primary media store + """ + + with self.store_into_file(file_info) as (f, fname, finish_cb): + # Write to the main repository + await self.write_to_file(source, f) + await finish_cb() + + return fname + + async def write_to_file(self, source: IO, output: IO) -> None: + """Asynchronously write the `source` to `output`.""" + await defer_to_thread(self.reactor, _write_file_synchronously, source, output) + + @contextlib.contextmanager + def store_into_file( + self, file_info: FileInfo + ) -> Generator[Tuple[BinaryIO, str, Callable[[], Awaitable[None]]], None, None]: + """Context manager used to get a file like object to write into, as + described by file_info. + + Actually yields a 3-tuple (file, fname, finish_cb), where file is a file + like object that can be written to, fname is the absolute path of file + on disk, and finish_cb is a function that returns an awaitable. + + fname can be used to read the contents from after upload, e.g. to + generate thumbnails. + + finish_cb must be called and waited on after the file has been + successfully been written to. Should not be called if there was an + error. + + Args: + file_info: Info about the file to store + + Example: + + with media_storage.store_into_file(info) as (f, fname, finish_cb): + # .. write into f ... + await finish_cb() + """ + + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + os.makedirs(dirname, exist_ok=True) + + finished_called = [False] + + try: + with open(fname, "wb") as f: + + async def finish() -> None: + # Ensure that all writes have been flushed and close the + # file. + f.flush() + f.close() + + spam_check = await self.spam_checker.check_media_file_for_spam( + ReadableFileWrapper(self.clock, fname), file_info + ) + if spam_check != synapse.module_api.NOT_SPAM: + logger.info("Blocking media due to spam checker") + # Note that we'll delete the stored media, due to the + # try/except below. The media also won't be stored in + # the DB. + # We currently ignore any additional field returned by + # the spam-check API. + raise SpamMediaException(errcode=spam_check[0]) + + for provider in self.storage_providers: + await provider.store_file(path, file_info) + + finished_called[0] = True + + yield f, fname, finish + except Exception as e: + try: + os.remove(fname) + except Exception: + pass + + raise e from None + + if not finished_called: + raise Exception("Finished callback not called") + + async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: + """Attempts to fetch media described by file_info from the local cache + and configured storage providers. + + Args: + file_info + + Returns: + Returns a Responder if the file was found, otherwise None. + """ + paths = [self._file_info_to_path(file_info)] + + # fallback for remote thumbnails with no method in the filename + if file_info.thumbnail and file_info.server_name: + paths.append( + self.filepaths.remote_media_thumbnail_rel_legacy( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + ) + ) + + for path in paths: + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + logger.debug("responding with local file %s", local_path) + return FileResponder(open(local_path, "rb")) + logger.debug("local file %s did not exist", local_path) + + for provider in self.storage_providers: + for path in paths: + res: Any = await provider.fetch(path, file_info) + if res: + logger.debug("Streaming %s from %s", path, provider) + return res + logger.debug("%s not found on %s", path, provider) + + return None + + async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: + """Ensures that the given file is in the local cache. Attempts to + download it from storage providers if it isn't. + + Args: + file_info + + Returns: + Full path to local file + """ + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + return local_path + + # Fallback for paths without method names + # Should be removed in the future + if file_info.thumbnail and file_info.server_name: + legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + ) + legacy_local_path = os.path.join(self.local_media_directory, legacy_path) + if os.path.exists(legacy_local_path): + return legacy_local_path + + dirname = os.path.dirname(local_path) + os.makedirs(dirname, exist_ok=True) + + for provider in self.storage_providers: + res: Any = await provider.fetch(path, file_info) + if res: + with res: + consumer = BackgroundFileConsumer( + open(local_path, "wb"), self.reactor + ) + await res.write_to_consumer(consumer) + await consumer.wait() + return local_path + + raise NotFoundError() + + def _file_info_to_path(self, file_info: FileInfo) -> str: + """Converts file_info into a relative path. + + The path is suitable for storing files under a directory, e.g. used to + store files on local FS under the base media repository directory. + """ + if file_info.url_cache: + if file_info.thumbnail: + return self.filepaths.url_cache_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.url_cache_filepath_rel(file_info.file_id) + + if file_info.server_name: + if file_info.thumbnail: + return self.filepaths.remote_media_thumbnail_rel( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.remote_media_filepath_rel( + file_info.server_name, file_info.file_id + ) + + if file_info.thumbnail: + return self.filepaths.local_media_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, + ) + return self.filepaths.local_media_filepath_rel(file_info.file_id) + + +def _write_file_synchronously(source: IO, dest: IO) -> None: + """Write `source` to the file like `dest` synchronously. Should be called + from a thread. + + Args: + source: A file like object that's to be written + dest: A file like object to be written to + """ + source.seek(0) # Ensure we read from the start of the file + shutil.copyfileobj(source, dest) + + +class FileResponder(Responder): + """Wraps an open file that can be sent to a request. + + Args: + open_file: A file like object to be streamed ot the client, + is closed when finished streaming. + """ + + def __init__(self, open_file: IO): + self.open_file = open_file + + def write_to_consumer(self, consumer: IConsumer) -> Deferred: + return make_deferred_yieldable( + FileSender().beginFileTransfer(self.open_file, consumer) + ) + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + self.open_file.close() + + +class SpamMediaException(NotFoundError): + """The media was blocked by a spam checker, so we simply 404 the request (in + the same way as if it was quarantined). + """ + + +@attr.s(slots=True, auto_attribs=True) +class ReadableFileWrapper: + """Wrapper that allows reading a file in chunks, yielding to the reactor, + and writing to a callback. + + This is simplified `FileSender` that takes an IO object rather than an + `IConsumer`. + """ + + CHUNK_SIZE = 2**14 + + clock: Clock + path: str + + async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None: + """Reads the file in chunks and calls the callback with each chunk.""" + + with open(self.path, "rb") as file: + while True: + chunk = file.read(self.CHUNK_SIZE) + if not chunk: + break + + callback(chunk) + + # We yield to the reactor by sleeping for 0 seconds. + await self.clock.sleep(0) diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py new file mode 100644 index 0000000000..c0eaf04be5 --- /dev/null +++ b/synapse/media/oembed.py @@ -0,0 +1,265 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import html +import logging +import urllib.parse +from typing import TYPE_CHECKING, List, Optional + +import attr + +from synapse.media.preview_html import parse_html_description +from synapse.types import JsonDict +from synapse.util import json_decoder + +if TYPE_CHECKING: + from lxml import etree + + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class OEmbedResult: + # The Open Graph result (converted from the oEmbed result). + open_graph_result: JsonDict + # The author_name of the oEmbed result + author_name: Optional[str] + # Number of milliseconds to cache the content, according to the oEmbed response. + # + # This will be None if no cache-age is provided in the oEmbed response (or + # if the oEmbed response cannot be turned into an Open Graph response). + cache_age: Optional[int] + + +class OEmbedProvider: + """ + A helper for accessing oEmbed content. + + It can be used to check if a URL should be accessed via oEmbed and for + requesting/parsing oEmbed content. + """ + + def __init__(self, hs: "HomeServer"): + self._oembed_patterns = {} + for oembed_endpoint in hs.config.oembed.oembed_patterns: + api_endpoint = oembed_endpoint.api_endpoint + + # Only JSON is supported at the moment. This could be declared in + # the formats field. Otherwise, if the endpoint ends in .xml assume + # it doesn't support JSON. + if ( + oembed_endpoint.formats is not None + and "json" not in oembed_endpoint.formats + ) or api_endpoint.endswith(".xml"): + logger.info( + "Ignoring oEmbed endpoint due to not supporting JSON: %s", + api_endpoint, + ) + continue + + # Iterate through each URL pattern and point it to the endpoint. + for pattern in oembed_endpoint.url_patterns: + self._oembed_patterns[pattern] = api_endpoint + + def get_oembed_url(self, url: str) -> Optional[str]: + """ + Check whether the URL should be downloaded as oEmbed content instead. + + Args: + url: The URL to check. + + Returns: + A URL to use instead or None if the original URL should be used. + """ + for url_pattern, endpoint in self._oembed_patterns.items(): + if url_pattern.fullmatch(url): + # TODO Specify max height / width. + + # Note that only the JSON format is supported, some endpoints want + # this in the URL, others want it as an argument. + endpoint = endpoint.replace("{format}", "json") + + args = {"url": url, "format": "json"} + query_str = urllib.parse.urlencode(args, True) + return f"{endpoint}?{query_str}" + + # No match. + return None + + def autodiscover_from_html(self, tree: "etree.Element") -> Optional[str]: + """ + Search an HTML document for oEmbed autodiscovery information. + + Args: + tree: The parsed HTML body. + + Returns: + The URL to use for oEmbed information, or None if no URL was found. + """ + # Search for link elements with the proper rel and type attributes. + for tag in tree.xpath( + "//link[@rel='alternate'][@type='application/json+oembed']" + ): + if "href" in tag.attrib: + return tag.attrib["href"] + + # Some providers (e.g. Flickr) use alternative instead of alternate. + for tag in tree.xpath( + "//link[@rel='alternative'][@type='application/json+oembed']" + ): + if "href" in tag.attrib: + return tag.attrib["href"] + + return None + + def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: + """ + Parse the oEmbed response into an Open Graph response. + + Args: + url: The URL which is being previewed (not the one which was + requested). + raw_body: The oEmbed response as JSON encoded as bytes. + + Returns: + json-encoded Open Graph data + """ + + try: + # oEmbed responses *must* be UTF-8 according to the spec. + oembed = json_decoder.decode(raw_body.decode("utf-8")) + except ValueError: + return OEmbedResult({}, None, None) + + # The version is a required string field, but not always provided, + # or sometimes provided as a float. Be lenient. + oembed_version = oembed.get("version", "1.0") + if oembed_version != "1.0" and oembed_version != 1: + return OEmbedResult({}, None, None) + + # Attempt to parse the cache age, if possible. + try: + cache_age = int(oembed.get("cache_age")) * 1000 + except (TypeError, ValueError): + # If the cache age cannot be parsed (e.g. wrong type or invalid + # string), ignore it. + cache_age = None + + # The oEmbed response converted to Open Graph. + open_graph_response: JsonDict = {"og:url": url} + + title = oembed.get("title") + if title and isinstance(title, str): + # A common WordPress plug-in seems to incorrectly escape entities + # in the oEmbed response. + open_graph_response["og:title"] = html.unescape(title) + + author_name = oembed.get("author_name") + if not isinstance(author_name, str): + author_name = None + + # Use the provider name and as the site. + provider_name = oembed.get("provider_name") + if provider_name and isinstance(provider_name, str): + open_graph_response["og:site_name"] = provider_name + + # If a thumbnail exists, use it. Note that dimensions will be calculated later. + thumbnail_url = oembed.get("thumbnail_url") + if thumbnail_url and isinstance(thumbnail_url, str): + open_graph_response["og:image"] = thumbnail_url + + # Process each type separately. + oembed_type = oembed.get("type") + if oembed_type == "rich": + html_str = oembed.get("html") + if isinstance(html_str, str): + calc_description_and_urls(open_graph_response, html_str) + + elif oembed_type == "photo": + # If this is a photo, use the full image, not the thumbnail. + url = oembed.get("url") + if url and isinstance(url, str): + open_graph_response["og:image"] = url + + elif oembed_type == "video": + open_graph_response["og:type"] = "video.other" + html_str = oembed.get("html") + if html_str and isinstance(html_str, str): + calc_description_and_urls(open_graph_response, oembed["html"]) + for size in ("width", "height"): + val = oembed.get(size) + if type(val) is int: + open_graph_response[f"og:video:{size}"] = val + + elif oembed_type == "link": + open_graph_response["og:type"] = "website" + + else: + logger.warning("Unknown oEmbed type: %s", oembed_type) + + return OEmbedResult(open_graph_response, author_name, cache_age) + + +def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]: + results = [] + for tag in tree.xpath("//*/" + tag_name): + if "src" in tag.attrib: + results.append(tag.attrib["src"]) + return results + + +def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: + """ + Calculate description for an HTML document. + + This uses lxml to convert the HTML document into plaintext. If errors + occur during processing of the document, an empty response is returned. + + Args: + open_graph_response: The current Open Graph summary. This is updated with additional fields. + html_body: The HTML document, as bytes. + + Returns: + The summary + """ + # If there's no body, nothing useful is going to be found. + if not html_body: + return + + from lxml import etree + + # Create an HTML parser. If this fails, log and return no metadata. + parser = etree.HTMLParser(recover=True, encoding="utf-8") + + # Attempt to parse the body. If this fails, log and return no metadata. + tree = etree.fromstring(html_body, parser) + + # The data was successfully parsed, but no tree was found. + if tree is None: + return + + # Attempt to find interesting URLs (images, videos, embeds). + if "og:image" not in open_graph_response: + image_urls = _fetch_urls(tree, "img") + if image_urls: + open_graph_response["og:image"] = image_urls[0] + + video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed") + if video_urls: + open_graph_response["og:video"] = video_urls[0] + + description = parse_html_description(tree) + if description: + open_graph_response["og:description"] = description diff --git a/synapse/media/preview_html.py b/synapse/media/preview_html.py new file mode 100644 index 0000000000..516d0434f0 --- /dev/null +++ b/synapse/media/preview_html.py @@ -0,0 +1,501 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import codecs +import logging +import re +from typing import ( + TYPE_CHECKING, + Callable, + Dict, + Generator, + Iterable, + List, + Optional, + Set, + Union, +) + +if TYPE_CHECKING: + from lxml import etree + +logger = logging.getLogger(__name__) + +_charset_match = re.compile( + rb'<\s*meta[^>]*charset\s*=\s*"?([a-z0-9_-]+)"?', flags=re.I +) +_xml_encoding_match = re.compile( + rb'\s*<\s*\?\s*xml[^>]*encoding="([a-z0-9_-]+)"', flags=re.I +) +_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) + +# Certain elements aren't meant for display. +ARIA_ROLES_TO_IGNORE = {"directory", "menu", "menubar", "toolbar"} + + +def _normalise_encoding(encoding: str) -> Optional[str]: + """Use the Python codec's name as the normalised entry.""" + try: + return codecs.lookup(encoding).name + except LookupError: + return None + + +def _get_html_media_encodings( + body: bytes, content_type: Optional[str] +) -> Iterable[str]: + """ + Get potential encoding of the body based on the (presumably) HTML body or the content-type header. + + The precedence used for finding a character encoding is: + + 1. tag with a charset declared. + 2. The XML document's character encoding attribute. + 3. The Content-Type header. + 4. Fallback to utf-8. + 5. Fallback to windows-1252. + + This roughly follows the algorithm used by BeautifulSoup's bs4.dammit.EncodingDetector. + + Args: + body: The HTML document, as bytes. + content_type: The Content-Type header. + + Returns: + The character encoding of the body, as a string. + """ + # There's no point in returning an encoding more than once. + attempted_encodings: Set[str] = set() + + # Limit searches to the first 1kb, since it ought to be at the top. + body_start = body[:1024] + + # Check if it has an encoding set in a meta tag. + match = _charset_match.search(body_start) + if match: + encoding = _normalise_encoding(match.group(1).decode("ascii")) + if encoding: + attempted_encodings.add(encoding) + yield encoding + + # TODO Support + + # Check if it has an XML document with an encoding. + match = _xml_encoding_match.match(body_start) + if match: + encoding = _normalise_encoding(match.group(1).decode("ascii")) + if encoding and encoding not in attempted_encodings: + attempted_encodings.add(encoding) + yield encoding + + # Check the HTTP Content-Type header for a character set. + if content_type: + content_match = _content_type_match.match(content_type) + if content_match: + encoding = _normalise_encoding(content_match.group(1)) + if encoding and encoding not in attempted_encodings: + attempted_encodings.add(encoding) + yield encoding + + # Finally, fallback to UTF-8, then windows-1252. + for fallback in ("utf-8", "cp1252"): + if fallback not in attempted_encodings: + yield fallback + + +def decode_body( + body: bytes, uri: str, content_type: Optional[str] = None +) -> Optional["etree.Element"]: + """ + This uses lxml to parse the HTML document. + + Args: + body: The HTML document, as bytes. + uri: The URI used to download the body. + content_type: The Content-Type header. + + Returns: + The parsed HTML body, or None if an error occurred during processed. + """ + # If there's no body, nothing useful is going to be found. + if not body: + return None + + # The idea here is that multiple encodings are tried until one works. + # Unfortunately the result is never used and then LXML will decode the string + # again with the found encoding. + for encoding in _get_html_media_encodings(body, content_type): + try: + body.decode(encoding) + except Exception: + pass + else: + break + else: + logger.warning("Unable to decode HTML body for %s", uri) + return None + + from lxml import etree + + # Create an HTML parser. + parser = etree.HTMLParser(recover=True, encoding=encoding) + + # Attempt to parse the body. Returns None if the body was successfully + # parsed, but no tree was found. + return etree.fromstring(body, parser) + + +def _get_meta_tags( + tree: "etree.Element", + property: str, + prefix: str, + property_mapper: Optional[Callable[[str], Optional[str]]] = None, +) -> Dict[str, Optional[str]]: + """ + Search for meta tags prefixed with a particular string. + + Args: + tree: The parsed HTML document. + property: The name of the property which contains the tag name, e.g. + "property" for Open Graph. + prefix: The prefix on the property to search for, e.g. "og" for Open Graph. + property_mapper: An optional callable to map the property to the Open Graph + form. Can return None for a key to ignore that key. + + Returns: + A map of tag name to value. + """ + results: Dict[str, Optional[str]] = {} + for tag in tree.xpath( + f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]" + ): + # if we've got more than 50 tags, someone is taking the piss + if len(results) >= 50: + logger.warning( + "Skipping parsing of Open Graph for page with too many '%s:' tags", + prefix, + ) + return {} + + key = tag.attrib[property] + if property_mapper: + key = property_mapper(key) + # None is a special value used to ignore a value. + if key is None: + continue + + results[key] = tag.attrib["content"] + + return results + + +def _map_twitter_to_open_graph(key: str) -> Optional[str]: + """ + Map a Twitter card property to the analogous Open Graph property. + + Args: + key: The Twitter card property (starts with "twitter:"). + + Returns: + The Open Graph property (starts with "og:") or None to have this property + be ignored. + """ + # Twitter card properties with no analogous Open Graph property. + if key == "twitter:card" or key == "twitter:creator": + return None + if key == "twitter:site": + return "og:site_name" + # Otherwise, swap twitter to og. + return "og" + key[7:] + + +def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: + """ + Parse the HTML document into an Open Graph response. + + This uses lxml to search the HTML document for Open Graph data (or + synthesizes it from the document). + + Args: + tree: The parsed HTML document. + + Returns: + The Open Graph response as a dictionary. + """ + + # Search for Open Graph (og:) meta tags, e.g.: + # + # "og:type" : "video", + # "og:url" : "https://www.youtube.com/watch?v=LXDBoHyjmtw", + # "og:site_name" : "YouTube", + # "og:video:type" : "application/x-shockwave-flash", + # "og:description" : "Fun stuff happening here", + # "og:title" : "RemoteJam - Matrix team hack for Disrupt Europe Hackathon", + # "og:image" : "https://i.ytimg.com/vi/LXDBoHyjmtw/maxresdefault.jpg", + # "og:video:url" : "http://www.youtube.com/v/LXDBoHyjmtw?version=3&autohide=1", + # "og:video:width" : "1280" + # "og:video:height" : "720", + # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3", + + og = _get_meta_tags(tree, "property", "og") + + # TODO: Search for properties specific to the different Open Graph types, + # such as article: meta tags, e.g.: + # + # "article:publisher" : "https://www.facebook.com/thethudonline" /> + # "article:author" content="https://www.facebook.com/thethudonline" /> + # "article:tag" content="baby" /> + # "article:section" content="Breaking News" /> + # "article:published_time" content="2016-03-31T19:58:24+00:00" /> + # "article:modified_time" content="2016-04-01T18:31:53+00:00" /> + + # Search for Twitter Card (twitter:) meta tags, e.g.: + # + # "twitter:site" : "@matrixdotorg" + # "twitter:creator" : "@matrixdotorg" + # + # Twitter cards tags also duplicate Open Graph tags. + # + # See https://developer.twitter.com/en/docs/twitter-for-websites/cards/guides/getting-started + twitter = _get_meta_tags(tree, "name", "twitter", _map_twitter_to_open_graph) + # Merge the Twitter values with the Open Graph values, but do not overwrite + # information from Open Graph tags. + for key, value in twitter.items(): + if key not in og: + og[key] = value + + if "og:title" not in og: + # Attempt to find a title from the title tag, or the biggest header on the page. + title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()") + if title: + og["og:title"] = title[0].strip() + else: + og["og:title"] = None + + if "og:image" not in og: + meta_image = tree.xpath( + "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]" + ) + # If a meta image is found, use it. + if meta_image: + og["og:image"] = meta_image[0] + else: + # Try to find images which are larger than 10px by 10px. + # + # TODO: consider inlined CSS styles as well as width & height attribs + images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]") + images = sorted( + images, + key=lambda i: ( + -1 * float(i.attrib["width"]) * float(i.attrib["height"]) + ), + ) + # If no images were found, try to find *any* images. + if not images: + images = tree.xpath("//img[@src][1]") + if images: + og["og:image"] = images[0].attrib["src"] + + # Finally, fallback to the favicon if nothing else. + else: + favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]") + if favicons: + og["og:image"] = favicons[0] + + if "og:description" not in og: + # Check the first meta description tag for content. + meta_description = tree.xpath( + "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]" + ) + # If a meta description is found with content, use it. + if meta_description: + og["og:description"] = meta_description[0] + else: + og["og:description"] = parse_html_description(tree) + elif og["og:description"]: + # This must be a non-empty string at this point. + assert isinstance(og["og:description"], str) + og["og:description"] = summarize_paragraphs([og["og:description"]]) + + # TODO: delete the url downloads to stop diskfilling, + # as we only ever cared about its OG + return og + + +def parse_html_description(tree: "etree.Element") -> Optional[str]: + """ + Calculate a text description based on an HTML document. + + Grabs any text nodes which are inside the tag, unless they are within + an HTML5 semantic markup tag (
,