From d94bcbced3840bb60a299136dfeb8ef380968b5a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Oct 2022 11:53:34 +0100 Subject: Fix pinning Rust deps in docker images (#14129) --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docker') diff --git a/docker/Dockerfile b/docker/Dockerfile index b20951d4cf..8be49681b4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -121,7 +121,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ COPY synapse /synapse/synapse/ COPY rust /synapse/rust/ # ... and what we need to `pip install`. -COPY pyproject.toml README.rst build_rust.py /synapse/ +COPY pyproject.toml README.rst build_rust.py Cargo.toml Cargo.lock /synapse/ # Repeat of earlier build argument declaration, as this is a new build stage. ARG TEST_ONLY_IGNORE_POETRY_LOCKFILE -- cgit 1.5.1 From 3f057e4c54ff1e4ef8b4f651c35d4698b389fe9b Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 12 Oct 2022 10:47:02 +0100 Subject: Use minimal Rust installation in docker images and CI (#14141) --- changelog.d/14141.docker | 1 + changelog.d/14141.misc | 1 + docker/Dockerfile | 2 +- docker/Dockerfile-dhvirtualenv | 2 +- pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 changelog.d/14141.docker create mode 100644 changelog.d/14141.misc (limited to 'docker') diff --git a/changelog.d/14141.docker b/changelog.d/14141.docker new file mode 100644 index 0000000000..561806cdae --- /dev/null +++ b/changelog.d/14141.docker @@ -0,0 +1 @@ +Use the `minimal` Rust profile when building Synapse. diff --git a/changelog.d/14141.misc b/changelog.d/14141.misc new file mode 100644 index 0000000000..561806cdae --- /dev/null +++ b/changelog.d/14141.misc @@ -0,0 +1 @@ +Use the `minimal` Rust profile when building Synapse. diff --git a/docker/Dockerfile b/docker/Dockerfile index b20951d4cf..48fe95d9fd 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -106,7 +106,7 @@ ENV CARGO_HOME=/cargo ENV PATH=/cargo/bin:/rust/bin:$PATH RUN mkdir /rust /cargo -RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable +RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable --profile minimal # To speed up rebuilds, install all of the dependencies before we copy over # the whole synapse project, so that this layer in the Docker cache can be diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv index ca3a259081..73165f6f85 100644 --- a/docker/Dockerfile-dhvirtualenv +++ b/docker/Dockerfile-dhvirtualenv @@ -92,7 +92,7 @@ ENV CARGO_HOME=/cargo ENV PATH=/cargo/bin:/rust/bin:$PATH RUN mkdir /rust /cargo -RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable +RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable --profile minimal COPY --from=builder /dh-virtualenv_1.2.2-1_all.deb / diff --git a/pyproject.toml b/pyproject.toml index 5e36baf40d..3dc4b66ca9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -316,7 +316,7 @@ build-backend = "poetry.core.masonry.api" skip = "cp36* *-musllinux_i686" # We need a rust compiler -before-all = "curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y" +before-all = "curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y --profile minimal" environment= { PATH = "$PATH:$HOME/.cargo/bin" } # For some reason if we don't manually clean the build directory we -- cgit 1.5.1 From c604d2c218a80f169876cf3063817e038063f7b9 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 12 Oct 2022 06:46:13 -0400 Subject: Mark /relations endpoint as usable on workers. (#14028) Co-authored-by: Eric Eastwood --- changelog.d/14028.feature | 1 + docker/complement/conf/start_for_complement.sh | 1 + docker/configure_workers_and_start.py | 27 ++++++++++++++++++++++++++ docs/workers.md | 1 + scripts-dev/complement.sh | 7 +++++-- synapse/app/generic_worker.py | 2 ++ 6 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 changelog.d/14028.feature (limited to 'docker') diff --git a/changelog.d/14028.feature b/changelog.d/14028.feature new file mode 100644 index 0000000000..6f5663a0ef --- /dev/null +++ b/changelog.d/14028.feature @@ -0,0 +1 @@ +The `/relations` endpoint can now be used on workers. diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh index cc6482f763..bb85d9fed7 100755 --- a/docker/complement/conf/start_for_complement.sh +++ b/docker/complement/conf/start_for_complement.sh @@ -57,6 +57,7 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then federation_reader, \ federation_sender, \ synchrotron, \ + client_reader, \ appservice, \ pusher" diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 51583dc13d..8e7f605b24 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -107,6 +107,33 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "shared_extra_conf": {}, "worker_extra_conf": "", }, + "client_reader": { + "app": "synapse.app.generic_worker", + "listener_resources": ["client"], + "endpoint_patterns": [ + "^/_matrix/client/(api/v1|r0|v3|unstable)/publicRooms$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/joined_members$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/context/.*$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$", + "^/_matrix/client/v1/rooms/.*/hierarchy$", + "^/_matrix/client/(v1|unstable)/rooms/.*/relations/", + "^/_matrix/client/(api/v1|r0|v3|unstable)/login$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/account/whoami$", + "^/_matrix/client/versions$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$", + "^/_matrix/client/(r0|v3|unstable)/register$", + "^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$", + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event", + "^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms", + "^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases", + "^/_matrix/client/(api/v1|r0|v3|unstable)/search", + ], + "shared_extra_conf": {}, + "worker_extra_conf": "", + }, "federation_reader": { "app": "synapse.app.generic_worker", "listener_resources": ["federation"], diff --git a/docs/workers.md b/docs/workers.md index 27041ea57c..e8d6cbaf8b 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -203,6 +203,7 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$ ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$ ^/_matrix/client/v1/rooms/.*/hierarchy$ + ^/_matrix/client/(v1|unstable)/rooms/.*/relations/ ^/_matrix/client/unstable/org.matrix.msc2716/rooms/.*/batch_send$ ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ ^/_matrix/client/(r0|v3|unstable)/account/3pid$ diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index eab23f18f1..a7b1e1e3a8 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -126,7 +126,7 @@ export COMPLEMENT_BASE_IMAGE=complement-synapse extra_test_args=() -test_tags="synapse_blacklist,msc2716,msc3030,msc3787" +test_tags="synapse_blacklist,msc3787" # All environment variables starting with PASS_ will be shared. # (The prefix is stripped off before reaching the container.) @@ -158,7 +158,10 @@ else # We only test faster room joins on monoliths, because they are purposefully # being developed without worker support to start with. - test_tags="$test_tags,faster_joins" + # + # The tests for importing historical messages (MSC2716) and jump to date (MSC3030) + # also only pass with monoliths, currently. + test_tags="$test_tags,faster_joins,msc2716,msc3030" fi diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 5e3825fca6..dc49840f73 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -65,6 +65,7 @@ from synapse.rest.client import ( push_rule, read_marker, receipts, + relations, room, room_batch, room_keys, @@ -308,6 +309,7 @@ class GenericWorkerServer(HomeServer): sync.register_servlets(self, resource) events.register_servlets(self, resource) room.register_servlets(self, resource, is_worker=True) + relations.register_servlets(self, resource) room.register_deprecated_servlets(self, resource) initial_sync.register_servlets(self, resource) room_batch.register_servlets(self, resource) -- cgit 1.5.1 From 29ee4b6698e3f5fd06406e194e6d88cff623fa7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Oct 2022 19:16:21 +0100 Subject: Fix docker build OOMing in CI for arm64 builds (#14173) Co-authored-by: David Robertson --- .github/workflows/docker.yml | 5 ++++- changelog.d/14173.docker | 1 + docker/Dockerfile | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14173.docker (limited to 'docker') diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index b3793e5c1f..b20048ff54 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -27,7 +27,7 @@ jobs: - name: Inspect builder run: docker buildx inspect - + - name: Log in to DockerHub uses: docker/login-action@v2 with: @@ -55,3 +55,6 @@ jobs: tags: "${{ steps.set-tag.outputs.tags }}" file: "docker/Dockerfile" platforms: linux/amd64,linux/arm64 + build-args: + # arm64 builds OOM otherwise. c.f. https://github.com/rust-lang/cargo/issues/10583 + CARGO_NET_GIT_FETCH_WITH_CLI: true diff --git a/changelog.d/14173.docker b/changelog.d/14173.docker new file mode 100644 index 0000000000..5ad113f435 --- /dev/null +++ b/changelog.d/14173.docker @@ -0,0 +1 @@ +Fix docker build OOMing in CI for arm64 builds. diff --git a/docker/Dockerfile b/docker/Dockerfile index 8be49681b4..62f0a9bf93 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -108,6 +108,12 @@ RUN mkdir /rust /cargo RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable + +# arm64 builds consume a lot of memory if `CARGO_NET_GIT_FETCH_WITH_CLI` is not +# set to true, so we expose it as a build-arg. +ARG CARGO_NET_GIT_FETCH_WITH_CLI=false +ENV CARGO_NET_GIT_FETCH_WITH_CLI=$CARGO_NET_GIT_FETCH_WITH_CLI + # To speed up rebuilds, install all of the dependencies before we copy over # the whole synapse project, so that this layer in the Docker cache can be # used while you develop on the source -- cgit 1.5.1 From c3e4edb4d6ba33383bc056e3ff22b2d034d3e248 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 14 Oct 2022 07:16:50 -0400 Subject: Stabilize the threads API. (#14175) Stabilize the threads API (MSC3856) by supporting (only) the v1 path for the endpoint. This also marks the API as safe for workers since it is a read-only API. --- changelog.d/13394.feature | 2 +- changelog.d/14175.feature | 1 + docker/configure_workers_and_start.py | 1 + docs/workers.md | 1 + synapse/config/experimental.py | 3 --- synapse/rest/client/relations.py | 9 ++----- tests/rest/client/test_relations.py | 47 +++++++++++++++++++++-------------- 7 files changed, 35 insertions(+), 29 deletions(-) create mode 100644 changelog.d/14175.feature (limited to 'docker') diff --git a/changelog.d/13394.feature b/changelog.d/13394.feature index 68de079cf3..df3ce45a76 100644 --- a/changelog.d/13394.feature +++ b/changelog.d/13394.feature @@ -1 +1 @@ -Experimental support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API. +Support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API. diff --git a/changelog.d/14175.feature b/changelog.d/14175.feature new file mode 100644 index 0000000000..df3ce45a76 --- /dev/null +++ b/changelog.d/14175.feature @@ -0,0 +1 @@ +Support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 8e7f605b24..d708237f69 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -118,6 +118,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$", "^/_matrix/client/v1/rooms/.*/hierarchy$", "^/_matrix/client/(v1|unstable)/rooms/.*/relations/", + "^/_matrix/client/v1/rooms/.*/threads$", "^/_matrix/client/(api/v1|r0|v3|unstable)/login$", "^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$", "^/_matrix/client/(api/v1|r0|v3|unstable)/account/whoami$", diff --git a/docs/workers.md b/docs/workers.md index e8d6cbaf8b..c27b3f8bd5 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -204,6 +204,7 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$ ^/_matrix/client/v1/rooms/.*/hierarchy$ ^/_matrix/client/(v1|unstable)/rooms/.*/relations/ + ^/_matrix/client/v1/rooms/.*/threads$ ^/_matrix/client/unstable/org.matrix.msc2716/rooms/.*/batch_send$ ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ ^/_matrix/client/(r0|v3|unstable)/account/3pid$ diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 1860006536..f44655516e 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -101,9 +101,6 @@ class ExperimentalConfig(Config): # MSC3848: Introduce errcodes for specific event sending failures self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False) - # MSC3856: Threads list API - self.msc3856_enabled: bool = experimental.get("msc3856_enabled", False) - # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices. self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False) diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index d1aa1947a5..9dd59196d9 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -82,11 +82,7 @@ class RelationPaginationServlet(RestServlet): class ThreadsServlet(RestServlet): - PATTERNS = ( - re.compile( - "^/_matrix/client/unstable/org.matrix.msc3856/rooms/(?P[^/]*)/threads" - ), - ) + PATTERNS = (re.compile("^/_matrix/client/v1/rooms/(?P[^/]*)/threads"),) def __init__(self, hs: "HomeServer"): super().__init__() @@ -126,5 +122,4 @@ class ThreadsServlet(RestServlet): def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: RelationPaginationServlet(hs).register(http_server) - if hs.config.experimental.msc3856_enabled: - ThreadsServlet(hs).register(http_server) + ThreadsServlet(hs).register(http_server) diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index d595295e2c..f5c1070b2c 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -1710,7 +1710,15 @@ class RelationRedactionTestCase(BaseRelationsTestCase): class ThreadsTestCase(BaseRelationsTestCase): - @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) + def _get_threads(self, body: JsonDict) -> List[Tuple[str, str]]: + return [ + ( + ev["event_id"], + ev["unsigned"]["m.relations"]["m.thread"]["latest_event"]["event_id"], + ) + for ev in body["chunk"] + ] + def test_threads(self) -> None: """Create threads and ensure the ordering is due to their latest event.""" # Create 2 threads. @@ -1718,32 +1726,37 @@ class ThreadsTestCase(BaseRelationsTestCase): res = self.helper.send(self.room, body="Thread Root!", tok=self.user_token) thread_2 = res["event_id"] - self._send_relation(RelationTypes.THREAD, "m.room.test") - self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2) + channel = self._send_relation(RelationTypes.THREAD, "m.room.test") + reply_1 = channel.json_body["event_id"] + channel = self._send_relation( + RelationTypes.THREAD, "m.room.test", parent_id=thread_2 + ) + reply_2 = channel.json_body["event_id"] # Request the threads in the room. channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + f"/_matrix/client/v1/rooms/{self.room}/threads", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) - thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] - self.assertEqual(thread_roots, [thread_2, thread_1]) + threads = self._get_threads(channel.json_body) + self.assertEqual(threads, [(thread_2, reply_2), (thread_1, reply_1)]) # Update the first thread, the ordering should swap. - self._send_relation(RelationTypes.THREAD, "m.room.test") + channel = self._send_relation(RelationTypes.THREAD, "m.room.test") + reply_3 = channel.json_body["event_id"] channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + f"/_matrix/client/v1/rooms/{self.room}/threads", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) - thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] - self.assertEqual(thread_roots, [thread_1, thread_2]) + # Tuple of (thread ID, latest event ID) for each thread. + threads = self._get_threads(channel.json_body) + self.assertEqual(threads, [(thread_1, reply_3), (thread_2, reply_2)]) - @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) def test_pagination(self) -> None: """Create threads and paginate through them.""" # Create 2 threads. @@ -1757,7 +1770,7 @@ class ThreadsTestCase(BaseRelationsTestCase): # Request the threads in the room. channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1", + f"/_matrix/client/v1/rooms/{self.room}/threads?limit=1", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) @@ -1771,7 +1784,7 @@ class ThreadsTestCase(BaseRelationsTestCase): channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1&from={next_batch}", + f"/_matrix/client/v1/rooms/{self.room}/threads?limit=1&from={next_batch}", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) @@ -1780,7 +1793,6 @@ class ThreadsTestCase(BaseRelationsTestCase): self.assertNotIn("next_batch", channel.json_body, channel.json_body) - @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) def test_include(self) -> None: """Filtering threads to all or participated in should work.""" # Thread 1 has the user as the root event. @@ -1807,7 +1819,7 @@ class ThreadsTestCase(BaseRelationsTestCase): # All threads in the room. channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + f"/_matrix/client/v1/rooms/{self.room}/threads", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) @@ -1819,14 +1831,13 @@ class ThreadsTestCase(BaseRelationsTestCase): # Only participated threads. channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?include=participated", + f"/_matrix/client/v1/rooms/{self.room}/threads?include=participated", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]] self.assertEqual(thread_roots, [thread_2, thread_1], channel.json_body) - @unittest.override_config({"experimental_features": {"msc3856_enabled": True}}) def test_ignored_user(self) -> None: """Events from ignored users should be ignored.""" # Thread 1 has a reply from an ignored user. @@ -1852,7 +1863,7 @@ class ThreadsTestCase(BaseRelationsTestCase): # Only thread 1 is returned. channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads", + f"/_matrix/client/v1/rooms/{self.room}/threads", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) -- cgit 1.5.1 From c7446906bdd2e30e5e830a77c05ab9a15aece254 Mon Sep 17 00:00:00 2001 From: realtyem Date: Fri, 14 Oct 2022 07:29:49 -0500 Subject: Set LD_PRELOAD to load jemalloc in Dockerfile-workers. (#14182) --- changelog.d/14182.docker | 1 + docker/configure_workers_and_start.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14182.docker (limited to 'docker') diff --git a/changelog.d/14182.docker b/changelog.d/14182.docker new file mode 100644 index 0000000000..a0ce1bce53 --- /dev/null +++ b/changelog.d/14182.docker @@ -0,0 +1 @@ +Set LD_PRELOAD to use jemalloc memory allocator in Dockerfile-workers. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index d708237f69..4d78453418 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -39,6 +39,7 @@ # continue to work if so. import os +import platform import subprocess import sys from pathlib import Path @@ -632,14 +633,23 @@ def main(args: List[str], environ: MutableMapping[str, str]) -> None: with open(mark_filepath, "w") as f: f.write("") + # Lifted right out of start.py + jemallocpath = "/usr/lib/%s-linux-gnu/libjemalloc.so.2" % (platform.machine(),) + + if os.path.isfile(jemallocpath): + environ["LD_PRELOAD"] = jemallocpath + else: + log("Could not find %s, will not use" % (jemallocpath,)) + # Start supervisord, which will start Synapse, all of the configured worker # processes, redis, nginx etc. according to the config we created above. log("Starting supervisord") - os.execl( + os.execle( "/usr/local/bin/supervisord", "supervisord", "-c", "/etc/supervisor/supervisord.conf", + environ, ) -- cgit 1.5.1 From c75836fe77293c4d8355e454d27c8d97ba87cac5 Mon Sep 17 00:00:00 2001 From: realtyem Date: Fri, 14 Oct 2022 14:38:04 -0500 Subject: Strip whitespace from worker types in Dockerfile-workers (#14165) --- changelog.d/14165.docker | 1 + docker/configure_workers_and_start.py | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14165.docker (limited to 'docker') diff --git a/changelog.d/14165.docker b/changelog.d/14165.docker new file mode 100644 index 0000000000..7f4bc62520 --- /dev/null +++ b/changelog.d/14165.docker @@ -0,0 +1 @@ +Prevent a class of database sharding errors when using `Dockerfile-workers` to spawn multiple instances of the same worker. Contributed by Jason Little. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 4d78453418..60a5c10ea7 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -402,8 +402,8 @@ def generate_worker_files( # No workers, just the main process worker_types = [] else: - # Split type names by comma - worker_types = worker_types_env.split(",") + # Split type names by comma, ignoring whitespace. + worker_types = [x.strip() for x in worker_types_env.split(",")] # Create the worker configuration directory if it doesn't already exist os.makedirs("/conf/workers", exist_ok=True) @@ -422,8 +422,6 @@ def generate_worker_files( # For each worker type specified by the user, create config values for worker_type in worker_types: - worker_type = worker_type.strip() - worker_config = WORKERS_CONFIG.get(worker_type) if worker_config: worker_config = worker_config.copy() -- cgit 1.5.1 From 6fee2f49f3cbcc4337493907151c9535228434ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Oct 2022 18:21:14 +0100 Subject: Cache Rust build cache when building docker images (#14130) --- changelog.d/14129.bugfix | 1 + changelog.d/14130.misc | 1 + docker/Dockerfile | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14129.bugfix create mode 100644 changelog.d/14130.misc (limited to 'docker') diff --git a/changelog.d/14129.bugfix b/changelog.d/14129.bugfix new file mode 100644 index 0000000000..c392d07d22 --- /dev/null +++ b/changelog.d/14129.bugfix @@ -0,0 +1 @@ +Fix pinning Rust dependencies in docker images. diff --git a/changelog.d/14130.misc b/changelog.d/14130.misc new file mode 100644 index 0000000000..b801e172f0 --- /dev/null +++ b/changelog.d/14130.misc @@ -0,0 +1 @@ +Cache Rust build cache when building docker images. diff --git a/docker/Dockerfile b/docker/Dockerfile index f13d45bcd9..7f8756e8a4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -135,7 +135,9 @@ ARG TEST_ONLY_IGNORE_POETRY_LOCKFILE # Install the synapse package itself. # If we have populated requirements.txt, we don't install any dependencies # as we should already have those from the previous `pip install` step. -RUN if [ -z "$TEST_ONLY_IGNORE_POETRY_LOCKFILE" ]; then \ +RUN --mount=type=cache,target=/synapse/target,sharing=locked \ + --mount=type=cache,target=${CARGO_HOME}/registry,sharing=locked \ + if [ -z "$TEST_ONLY_IGNORE_POETRY_LOCKFILE" ]; then \ pip install --prefix="/install" --no-deps --no-warn-script-location /synapse[all]; \ else \ pip install --prefix="/install" --no-warn-script-location /synapse[all]; \ -- cgit 1.5.1 From 6c5082f3e08d1748d35eb709f6f5a9f30f17b8ff Mon Sep 17 00:00:00 2001 From: realtyem Date: Tue, 18 Oct 2022 06:56:20 -0500 Subject: Flush stdout/err in Dockerfile-workers before replacing the current process (#14195) Also update `subprocess.check_output` to the slightly newer `subprocess.run`. Signed-off-by: Jason Little --- changelog.d/14195.docker | 1 + docker/configure_workers_and_start.py | 20 ++++++++------------ docker/start.py | 18 +++++++++++++----- 3 files changed, 22 insertions(+), 17 deletions(-) create mode 100644 changelog.d/14195.docker (limited to 'docker') diff --git a/changelog.d/14195.docker b/changelog.d/14195.docker new file mode 100644 index 0000000000..d755a79599 --- /dev/null +++ b/changelog.d/14195.docker @@ -0,0 +1 @@ +Fix pre-startup logging being lost when using the `Dockerfile-workers` image. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 60a5c10ea7..1ea456b2f8 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -230,24 +230,19 @@ upstream {upstream_worker_type} {{ # Utility functions def log(txt: str) -> None: - """Log something to the stdout. - - Args: - txt: The text to log. - """ print(txt) def error(txt: str) -> NoReturn: - """Log something and exit with an error code. - - Args: - txt: The text to log in error. - """ - log(txt) + print(txt, file=sys.stderr) sys.exit(2) +def flush_buffers() -> None: + sys.stdout.flush() + sys.stderr.flush() + + def convert(src: str, dst: str, **template_vars: object) -> None: """Generate a file from a template @@ -328,7 +323,7 @@ def generate_base_homeserver_config() -> None: # start.py already does this for us, so just call that. # note that this script is copied in in the official, monolith dockerfile os.environ["SYNAPSE_HTTP_PORT"] = str(MAIN_PROCESS_HTTP_LISTENER_PORT) - subprocess.check_output(["/usr/local/bin/python", "/start.py", "migrate_config"]) + subprocess.run(["/usr/local/bin/python", "/start.py", "migrate_config"], check=True) def generate_worker_files( @@ -642,6 +637,7 @@ def main(args: List[str], environ: MutableMapping[str, str]) -> None: # Start supervisord, which will start Synapse, all of the configured worker # processes, redis, nginx etc. according to the config we created above. log("Starting supervisord") + flush_buffers() os.execle( "/usr/local/bin/supervisord", "supervisord", diff --git a/docker/start.py b/docker/start.py index 5a98dce551..ebcc599f04 100755 --- a/docker/start.py +++ b/docker/start.py @@ -13,14 +13,19 @@ import jinja2 # Utility functions def log(txt: str) -> None: - print(txt, file=sys.stderr) + print(txt) def error(txt: str) -> NoReturn: - log(txt) + print(txt, file=sys.stderr) sys.exit(2) +def flush_buffers() -> None: + sys.stdout.flush() + sys.stderr.flush() + + def convert(src: str, dst: str, environ: Mapping[str, object]) -> None: """Generate a file from a template @@ -131,10 +136,10 @@ def generate_config_from_template( if ownership is not None: log(f"Setting ownership on /data to {ownership}") - subprocess.check_output(["chown", "-R", ownership, "/data"]) + subprocess.run(["chown", "-R", ownership, "/data"], check=True) args = ["gosu", ownership] + args - subprocess.check_output(args) + subprocess.run(args, check=True) def run_generate_config(environ: Mapping[str, str], ownership: Optional[str]) -> None: @@ -158,7 +163,7 @@ def run_generate_config(environ: Mapping[str, str], ownership: Optional[str]) -> if ownership is not None: # make sure that synapse has perms to write to the data dir. log(f"Setting ownership on {data_dir} to {ownership}") - subprocess.check_output(["chown", ownership, data_dir]) + subprocess.run(["chown", ownership, data_dir], check=True) # create a suitable log config from our template log_config_file = "%s/%s.log.config" % (config_dir, server_name) @@ -185,6 +190,7 @@ def run_generate_config(environ: Mapping[str, str], ownership: Optional[str]) -> "--open-private-ports", ] # log("running %s" % (args, )) + flush_buffers() os.execv(sys.executable, args) @@ -267,8 +273,10 @@ running with 'migrate_config'. See the README for more details. args = [sys.executable] + args if ownership is not None: args = ["gosu", ownership] + args + flush_buffers() os.execve("/usr/sbin/gosu", args, environ) else: + flush_buffers() os.execve(sys.executable, args, environ) -- cgit 1.5.1 From e440f9674a28da0d8bfc5a42e9f6b636f947c5c4 Mon Sep 17 00:00:00 2001 From: Jonathan de Jong Date: Tue, 18 Oct 2022 15:52:23 +0200 Subject: Enable URL previews in complement homeserver config. (#14198) --- changelog.d/14198.misc | 1 + docker/complement/conf/workers-shared-extra.yaml.j2 | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/14198.misc (limited to 'docker') diff --git a/changelog.d/14198.misc b/changelog.d/14198.misc new file mode 100644 index 0000000000..9d56f8a2c6 --- /dev/null +++ b/changelog.d/14198.misc @@ -0,0 +1 @@ +Enable url previews when testing with complement. \ No newline at end of file diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2 index 9e554a865e..c651645115 100644 --- a/docker/complement/conf/workers-shared-extra.yaml.j2 +++ b/docker/complement/conf/workers-shared-extra.yaml.j2 @@ -12,6 +12,8 @@ trusted_key_servers: [] enable_registration: true enable_registration_without_verification: true bcrypt_rounds: 4 +url_preview_enabled: true +url_preview_ip_range_blacklist: [] ## Registration ## -- cgit 1.5.1 From 59ca73006c84a74f1bf4934f1c109202ff82408e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 1 Nov 2022 13:26:28 -0400 Subject: Enable testing MSC3874 in complement. (#14339) --- changelog.d/14339.misc | 1 + docker/complement/conf/workers-shared-extra.yaml.j2 | 4 ++-- scripts-dev/complement.sh | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/14339.misc (limited to 'docker') diff --git a/changelog.d/14339.misc b/changelog.d/14339.misc new file mode 100644 index 0000000000..3761d453a8 --- /dev/null +++ b/changelog.d/14339.misc @@ -0,0 +1 @@ +Enabling testing of [MSC3874](https://github.com/matrix-org/matrix-spec-proposals/pull/3874) (filtering of `/messages` by relation type) in complement. diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2 index c651645115..883a87159c 100644 --- a/docker/complement/conf/workers-shared-extra.yaml.j2 +++ b/docker/complement/conf/workers-shared-extra.yaml.j2 @@ -92,8 +92,6 @@ allow_device_name_lookup_over_federation: true ## Experimental Features ## experimental_features: - # Enable spaces support - spaces_enabled: true # Enable history backfilling support msc2716_enabled: true # server-side support for partial state in /send_join responses @@ -104,6 +102,8 @@ experimental_features: {% endif %} # Enable jump to date endpoint msc3030_enabled: true + # Filtering /messages by relation type. + msc3874_enabled: true server_notices: system_mxid_localpart: _server diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index a7b1e1e3a8..76c859694f 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -126,7 +126,7 @@ export COMPLEMENT_BASE_IMAGE=complement-synapse extra_test_args=() -test_tags="synapse_blacklist,msc3787" +test_tags="synapse_blacklist,msc3787,msc3874" # All environment variables starting with PASS_ will be shared. # (The prefix is stripped off before reaching the container.) -- cgit 1.5.1 From 69814eb2824daf846f869cb9579eb1008e61f8ad Mon Sep 17 00:00:00 2001 From: realtyem Date: Tue, 8 Nov 2022 06:34:09 -0600 Subject: Allow override for requesting specific worker types for Complement on command line. (#14324) * Expose getting SYNAPSE_WORKER_TYPES from external, allowing override of workers requested. * Add WORKER_TYPES variable option to complement.sh script that passes requested workers into start_for_complement.sh entrypoint. * Update docs to reflect this new ability. * Changelog * Don't rely on soft wrapping to format long strings Good idea dklimpel. Thanks for catching that. Co-authored-by: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> * Small nits just noticed in docs. * Fixup new line in docs. Co-authored-by: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> --- changelog.d/14324.misc | 1 + docker/complement/conf/start_for_complement.sh | 9 ++++++++- docs/development/contributing_guide.md | 6 ++++++ scripts-dev/complement.sh | 3 +++ 4 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14324.misc (limited to 'docker') diff --git a/changelog.d/14324.misc b/changelog.d/14324.misc new file mode 100644 index 0000000000..54d8198a8f --- /dev/null +++ b/changelog.d/14324.misc @@ -0,0 +1 @@ +Add override ability to `complement.sh` command line script to request certain types of workers. diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh index bb85d9fed7..49d79745b0 100755 --- a/docker/complement/conf/start_for_complement.sh +++ b/docker/complement/conf/start_for_complement.sh @@ -45,7 +45,12 @@ esac if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then # Specify the workers to test with - export SYNAPSE_WORKER_TYPES="\ + # Allow overriding by explicitly setting SYNAPSE_WORKER_TYPES outside, while still + # utilizing WORKERS=1 for backwards compatibility. + # -n True if the length of string is non-zero. + # -z True if the length of string is zero. + if [[ -z "$SYNAPSE_WORKER_TYPES" ]]; then + export SYNAPSE_WORKER_TYPES="\ event_persister, \ event_persister, \ background_worker, \ @@ -61,6 +66,8 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then appservice, \ pusher" + fi + log "Workers requested: $SYNAPSE_WORKER_TYPES" # Improve startup times by using a launcher based on fork() export SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER=1 else diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index 1e52f9808c..342bc1d340 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -324,6 +324,12 @@ The above will run a monolithic (single-process) Synapse with SQLite as the data - Passing `POSTGRES=1` as an environment variable to use the Postgres database instead. - Passing `WORKERS=1` as an environment variable to use a workerised setup instead. This option implies the use of Postgres. + - If setting `WORKERS=1`, optionally set `WORKER_TYPES=` to declare which worker + types you wish to test. A simple comma-delimited string containing the worker types + defined from the `WORKERS_CONFIG` template in + [here](https://github.com/matrix-org/synapse/blob/develop/docker/configure_workers_and_start.py#L54). + A safe example would be `WORKER_TYPES="federation_inbound, federation_sender, synchrotron"`. + See the [worker documentation](../workers.md) for additional information on workers. To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`, e.g: ```sh diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 76c859694f..803c6ce92d 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -139,6 +139,9 @@ if [[ -n "$WORKERS" ]]; then # Use workers. export PASS_SYNAPSE_COMPLEMENT_USE_WORKERS=true + # Pass through the workers defined. If none, it will be an empty string + export PASS_SYNAPSE_WORKER_TYPES="$WORKER_TYPES" + # Workers can only use Postgres as a database. export PASS_SYNAPSE_COMPLEMENT_DATABASE=postgres -- cgit 1.5.1 From d85cba1aa0a2f6dbb988b81d331b5ba9487fe1ac Mon Sep 17 00:00:00 2001 From: realtyem Date: Tue, 8 Nov 2022 07:14:00 -0600 Subject: Add all Stream Writer worker types to configure_workers_and_start.py (#14197) Co-authored-by: reivilibre --- changelog.d/14197.docker | 1 + docker/configure_workers_and_start.py | 76 +++++++++++++++++++++++++++++++---- 2 files changed, 70 insertions(+), 7 deletions(-) create mode 100644 changelog.d/14197.docker (limited to 'docker') diff --git a/changelog.d/14197.docker b/changelog.d/14197.docker new file mode 100644 index 0000000000..529ccd99c5 --- /dev/null +++ b/changelog.d/14197.docker @@ -0,0 +1 @@ +Add all Stream Writer worker types to configure_workers_and_start.py. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 1ea456b2f8..da259129d1 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -50,7 +50,12 @@ from jinja2 import Environment, FileSystemLoader MAIN_PROCESS_HTTP_LISTENER_PORT = 8080 - +# Workers with exposed endpoints needs either "client", "federation", or "media" listener_resources +# Watching /_matrix/client needs a "client" listener +# Watching /_matrix/federation needs a "federation" listener +# Watching /_matrix/media and related needs a "media" listener +# Stream Writers require "client" and "replication" listeners because they +# have to attach by instance_map to the master process and have client endpoints. WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "pusher": { "app": "synapse.app.pusher", @@ -209,6 +214,49 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { % (MAIN_PROCESS_HTTP_LISTENER_PORT,) ), }, + "account_data": { + "app": "synapse.app.generic_worker", + "listener_resources": ["client", "replication"], + "endpoint_patterns": [ + "^/_matrix/client/(r0|v3|unstable)/.*/tags", + "^/_matrix/client/(r0|v3|unstable)/.*/account_data", + ], + "shared_extra_conf": {}, + "worker_extra_conf": "", + }, + "presence": { + "app": "synapse.app.generic_worker", + "listener_resources": ["client", "replication"], + "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/presence/"], + "shared_extra_conf": {}, + "worker_extra_conf": "", + }, + "receipts": { + "app": "synapse.app.generic_worker", + "listener_resources": ["client", "replication"], + "endpoint_patterns": [ + "^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt", + "^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers", + ], + "shared_extra_conf": {}, + "worker_extra_conf": "", + }, + "to_device": { + "app": "synapse.app.generic_worker", + "listener_resources": ["client", "replication"], + "endpoint_patterns": ["^/_matrix/client/(r0|v3|unstable)/sendToDevice/"], + "shared_extra_conf": {}, + "worker_extra_conf": "", + }, + "typing": { + "app": "synapse.app.generic_worker", + "listener_resources": ["client", "replication"], + "endpoint_patterns": [ + "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/typing" + ], + "shared_extra_conf": {}, + "worker_extra_conf": "", + }, } # Templates for sections that may be inserted multiple times in config files @@ -271,7 +319,7 @@ def convert(src: str, dst: str, **template_vars: object) -> None: outfile.write(rendered) -def add_sharding_to_shared_config( +def add_worker_roles_to_shared_config( shared_config: dict, worker_type: str, worker_name: str, @@ -309,6 +357,20 @@ def add_sharding_to_shared_config( "port": worker_port, } + elif worker_type in ["account_data", "presence", "receipts", "to_device", "typing"]: + # Update the list of stream writers + # It's convienent that the name of the worker type is the same as the event stream + shared_config.setdefault("stream_writers", {}).setdefault( + worker_type, [] + ).append(worker_name) + + # Map of stream writer instance names to host/ports combos + # For now, all stream writers need http replication ports + instance_map[worker_name] = { + "host": "localhost", + "port": worker_port, + } + elif worker_type == "media_repository": # The first configured media worker will run the media background jobs shared_config.setdefault("media_instance_running_background_jobs", worker_name) @@ -441,11 +503,11 @@ def generate_worker_files( # Check if more than one instance of this worker type has been specified worker_type_total_count = worker_types.count(worker_type) - if worker_type_total_count > 1: - # Update the shared config with sharding-related options if necessary - add_sharding_to_shared_config( - shared_config, worker_type, worker_name, worker_port - ) + + # Update the shared config with sharding-related options if necessary + add_worker_roles_to_shared_config( + shared_config, worker_type, worker_name, worker_port + ) # Enable the worker in supervisord worker_descriptors.append(worker_config) -- cgit 1.5.1 From e9cbddc8e779050e0053826686cc8ac768e37813 Mon Sep 17 00:00:00 2001 From: realtyem Date: Wed, 9 Nov 2022 06:02:15 -0600 Subject: Modernize configure_workers_and_start.py bootstrapping script for Dockerfile-workers. (#14294) --- changelog.d/14294.docker | 1 + docker/configure_workers_and_start.py | 29 ++++++++++++++--------------- 2 files changed, 15 insertions(+), 15 deletions(-) create mode 100644 changelog.d/14294.docker (limited to 'docker') diff --git a/changelog.d/14294.docker b/changelog.d/14294.docker new file mode 100644 index 0000000000..1489470408 --- /dev/null +++ b/changelog.d/14294.docker @@ -0,0 +1 @@ +Remove references to legacy worker types in the multi-worker Dockerfile. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index da259129d1..62b1bab297 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -20,7 +20,7 @@ # * SYNAPSE_SERVER_NAME: The desired server_name of the homeserver. # * SYNAPSE_REPORT_STATS: Whether to report stats. # * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKER_CONFIG -# below. Leave empty for no workers, or set to '*' for all possible workers. +# below. Leave empty for no workers. # * SYNAPSE_AS_REGISTRATION_DIR: If specified, a directory in which .yaml and .yml files # will be treated as Application Service registration files. # * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. @@ -58,10 +58,10 @@ MAIN_PROCESS_HTTP_LISTENER_PORT = 8080 # have to attach by instance_map to the master process and have client endpoints. WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "pusher": { - "app": "synapse.app.pusher", + "app": "synapse.app.generic_worker", "listener_resources": [], "endpoint_patterns": [], - "shared_extra_conf": {"start_pushers": False}, + "shared_extra_conf": {}, "worker_extra_conf": "", }, "user_dir": { @@ -84,7 +84,11 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "^/_synapse/admin/v1/media/.*$", "^/_synapse/admin/v1/quarantine_media/.*$", ], - "shared_extra_conf": {"enable_media_repo": False}, + # The first configured media worker will run the media background jobs + "shared_extra_conf": { + "enable_media_repo": False, + "media_instance_running_background_jobs": "media_repository1", + }, "worker_extra_conf": "enable_media_repo: true", }, "appservice": { @@ -95,10 +99,10 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "worker_extra_conf": "", }, "federation_sender": { - "app": "synapse.app.federation_sender", + "app": "synapse.app.generic_worker", "listener_resources": [], "endpoint_patterns": [], - "shared_extra_conf": {"send_federation": False}, + "shared_extra_conf": {}, "worker_extra_conf": "", }, "synchrotron": { @@ -205,7 +209,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "worker_extra_conf": "", }, "frontend_proxy": { - "app": "synapse.app.frontend_proxy", + "app": "synapse.app.generic_worker", "listener_resources": ["client", "replication"], "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload"], "shared_extra_conf": {}, @@ -326,7 +330,7 @@ def add_worker_roles_to_shared_config( worker_port: int, ) -> None: """Given a dictionary representing a config file shared across all workers, - append sharded worker information to it for the current worker_type instance. + append appropriate worker information to it for the current worker_type instance. Args: shared_config: The config dict that all worker instances share (after being converted to YAML) @@ -359,7 +363,7 @@ def add_worker_roles_to_shared_config( elif worker_type in ["account_data", "presence", "receipts", "to_device", "typing"]: # Update the list of stream writers - # It's convienent that the name of the worker type is the same as the event stream + # It's convenient that the name of the worker type is the same as the stream to write shared_config.setdefault("stream_writers", {}).setdefault( worker_type, [] ).append(worker_name) @@ -371,10 +375,6 @@ def add_worker_roles_to_shared_config( "port": worker_port, } - elif worker_type == "media_repository": - # The first configured media worker will run the media background jobs - shared_config.setdefault("media_instance_running_background_jobs", worker_name) - def generate_base_homeserver_config() -> None: """Starts Synapse and generates a basic homeserver config, which will later be @@ -483,8 +483,7 @@ def generate_worker_files( if worker_config: worker_config = worker_config.copy() else: - log(worker_type + " is an unknown worker type! It will be ignored") - continue + error(worker_type + " is an unknown worker type! Please fix!") new_worker_count = worker_type_counter.setdefault(worker_type, 0) + 1 worker_type_counter[worker_type] = new_worker_count -- cgit 1.5.1 From c15e9a0edb696990365ac5a4e5be847b5ae23921 Mon Sep 17 00:00:00 2001 From: realtyem Date: Wed, 16 Nov 2022 16:16:25 -0600 Subject: Remove need for `worker_main_http_uri` setting to use /keys/upload. (#14400) --- changelog.d/14400.misc | 1 + docker/configure_workers_and_start.py | 5 +- docs/workers.md | 7 +-- synapse/app/generic_worker.py | 103 +--------------------------------- synapse/config/workers.py | 6 ++ synapse/replication/http/devices.py | 67 ++++++++++++++++++++++ synapse/rest/client/keys.py | 68 ++++++++++++++++------ 7 files changed, 130 insertions(+), 127 deletions(-) create mode 100644 changelog.d/14400.misc (limited to 'docker') diff --git a/changelog.d/14400.misc b/changelog.d/14400.misc new file mode 100644 index 0000000000..6e025329c4 --- /dev/null +++ b/changelog.d/14400.misc @@ -0,0 +1 @@ +Remove the `worker_main_http_uri` configuration setting. This is now handled via internal replication. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 62b1bab297..c1e1544536 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -213,10 +213,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "listener_resources": ["client", "replication"], "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload"], "shared_extra_conf": {}, - "worker_extra_conf": ( - "worker_main_http_uri: http://127.0.0.1:%d" - % (MAIN_PROCESS_HTTP_LISTENER_PORT,) - ), + "worker_extra_conf": "", }, "account_data": { "app": "synapse.app.generic_worker", diff --git a/docs/workers.md b/docs/workers.md index 7ee8801161..4604650803 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -135,8 +135,8 @@ In the config file for each worker, you must specify: [`worker_replication_http_port`](usage/configuration/config_documentation.md#worker_replication_http_port)). * If handling HTTP requests, a [`worker_listeners`](usage/configuration/config_documentation.md#worker_listeners) option with an `http` listener. - * If handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for - the main process (`worker_main_http_uri`). + * **Synapse 1.71 and older:** if handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for + the main process (`worker_main_http_uri`). This config option is no longer required and is ignored when running Synapse 1.72 and newer. For example: @@ -221,7 +221,6 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/search$ # Encryption requests - # Note that ^/_matrix/client/(r0|v3|unstable)/keys/upload/ requires `worker_main_http_uri` ^/_matrix/client/(r0|v3|unstable)/keys/query$ ^/_matrix/client/(r0|v3|unstable)/keys/changes$ ^/_matrix/client/(r0|v3|unstable)/keys/claim$ @@ -376,7 +375,7 @@ responsible for - persisting them to the DB, and finally - updating the events stream. -Because load is sharded in this way, you *must* restart all worker instances when +Because load is sharded in this way, you *must* restart all worker instances when adding or removing event persisters. An `event_persister` should not be mistaken for an `event_creator`. diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 1d9aef45c2..74909b7d4a 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -14,14 +14,12 @@ # limitations under the License. import logging import sys -from typing import Dict, List, Optional, Tuple +from typing import Dict, List -from twisted.internet import address from twisted.web.resource import Resource import synapse import synapse.events -from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError from synapse.api.urls import ( CLIENT_API_PREFIX, FEDERATION_PREFIX, @@ -43,8 +41,6 @@ from synapse.config.logger import setup_logging from synapse.config.server import ListenerConfig from synapse.federation.transport.server import TransportLayerServer from synapse.http.server import JsonResource, OptionsResource -from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.http.site import SynapseRequest from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource @@ -70,12 +66,12 @@ from synapse.rest.client import ( versions, voip, ) -from synapse.rest.client._base import client_patterns from synapse.rest.client.account import ThreepidRestServlet, WhoamiRestServlet from synapse.rest.client.devices import DevicesRestServlet from synapse.rest.client.keys import ( KeyChangesServlet, KeyQueryServlet, + KeyUploadServlet, OneTimeKeyServlet, ) from synapse.rest.client.register import ( @@ -132,107 +128,12 @@ from synapse.storage.databases.main.transactions import TransactionWorkerStore from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore from synapse.storage.databases.main.user_directory import UserDirectoryStore from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore -from synapse.types import JsonDict from synapse.util import SYNAPSE_VERSION from synapse.util.httpresourcetree import create_resource_tree logger = logging.getLogger("synapse.app.generic_worker") -class KeyUploadServlet(RestServlet): - """An implementation of the `KeyUploadServlet` that responds to read only - requests, but otherwise proxies through to the master instance. - """ - - PATTERNS = client_patterns("/keys/upload(/(?P[^/]+))?$") - - def __init__(self, hs: HomeServer): - """ - Args: - hs: server - """ - super().__init__() - self.auth = hs.get_auth() - self.store = hs.get_datastores().main - self.http_client = hs.get_simple_http_client() - self.main_uri = hs.config.worker.worker_main_http_uri - - async def on_POST( - self, request: SynapseRequest, device_id: Optional[str] - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - user_id = requester.user.to_string() - body = parse_json_object_from_request(request) - - if device_id is not None: - # passing the device_id here is deprecated; however, we allow it - # for now for compatibility with older clients. - if requester.device_id is not None and device_id != requester.device_id: - logger.warning( - "Client uploading keys for a different device " - "(logged in as %s, uploading for %s)", - requester.device_id, - device_id, - ) - else: - device_id = requester.device_id - - if device_id is None: - raise SynapseError( - 400, "To upload keys, you must pass device_id when authenticating" - ) - - if body: - # They're actually trying to upload something, proxy to main synapse. - - # Proxy headers from the original request, such as the auth headers - # (in case the access token is there) and the original IP / - # User-Agent of the request. - headers: Dict[bytes, List[bytes]] = { - header: list(request.requestHeaders.getRawHeaders(header, [])) - for header in (b"Authorization", b"User-Agent") - } - # Add the previous hop to the X-Forwarded-For header. - x_forwarded_for = list( - request.requestHeaders.getRawHeaders(b"X-Forwarded-For", []) - ) - # we use request.client here, since we want the previous hop, not the - # original client (as returned by request.getClientAddress()). - if isinstance(request.client, (address.IPv4Address, address.IPv6Address)): - previous_host = request.client.host.encode("ascii") - # If the header exists, add to the comma-separated list of the first - # instance of the header. Otherwise, generate a new header. - if x_forwarded_for: - x_forwarded_for = [x_forwarded_for[0] + b", " + previous_host] - x_forwarded_for.extend(x_forwarded_for[1:]) - else: - x_forwarded_for = [previous_host] - headers[b"X-Forwarded-For"] = x_forwarded_for - - # Replicate the original X-Forwarded-Proto header. Note that - # XForwardedForRequest overrides isSecure() to give us the original protocol - # used by the client, as opposed to the protocol used by our upstream proxy - # - which is what we want here. - headers[b"X-Forwarded-Proto"] = [ - b"https" if request.isSecure() else b"http" - ] - - try: - result = await self.http_client.post_json_get_json( - self.main_uri + request.uri.decode("ascii"), body, headers=headers - ) - except HttpResponseException as e: - raise e.to_synapse_error() from e - except RequestSendFailed as e: - raise SynapseError(502, "Failed to talk to master") from e - - return 200, result - else: - # Just interested in counts. - result = await self.store.count_e2e_one_time_keys(user_id, device_id) - return 200, {"one_time_key_counts": result} - - class GenericWorkerSlavedStore( # FIXME(#3714): We need to add UserDirectoryStore as we write directly # rather than going via the correct worker. diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 88b3168cbc..c4e2273a95 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -162,7 +162,13 @@ class WorkerConfig(Config): self.worker_name = config.get("worker_name", self.worker_app) self.instance_name = self.worker_name or "master" + # FIXME: Remove this check after a suitable amount of time. self.worker_main_http_uri = config.get("worker_main_http_uri", None) + if self.worker_main_http_uri is not None: + logger.warning( + "The config option worker_main_http_uri is unused since Synapse 1.72. " + "It can be safely removed from your configuration." + ) # This option is really only here to support `--manhole` command line # argument. diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py index 3d63645726..c21629def8 100644 --- a/synapse/replication/http/devices.py +++ b/synapse/replication/http/devices.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Tuple from twisted.web.server import Request from synapse.http.server import HttpServer +from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint from synapse.types import JsonDict @@ -78,5 +79,71 @@ class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint): return 200, user_devices +class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint): + """Ask master to upload keys for the user and send them out over federation to + update other servers. + + For now, only the master is permitted to handle key upload requests; + any worker can handle key query requests (since they're read-only). + + Calls to e2e_keys_handler.upload_keys_for_user(user_id, device_id, keys) on + the main process to accomplish this. + + Defined in https://spec.matrix.org/v1.4/client-server-api/#post_matrixclientv3keysupload + Request format(borrowed and expanded from KeyUploadServlet): + + POST /_synapse/replication/upload_keys_for_user + + { + "user_id": "", + "device_id": "", + "keys": { + ....this part can be found in KeyUploadServlet in rest/client/keys.py.... + } + } + + Response is equivalent to ` /_matrix/client/v3/keys/upload` found in KeyUploadServlet + + """ + + NAME = "upload_keys_for_user" + PATH_ARGS = () + CACHE = False + + def __init__(self, hs: "HomeServer"): + super().__init__(hs) + + self.e2e_keys_handler = hs.get_e2e_keys_handler() + self.store = hs.get_datastores().main + self.clock = hs.get_clock() + + @staticmethod + async def _serialize_payload( # type: ignore[override] + user_id: str, device_id: str, keys: JsonDict + ) -> JsonDict: + + return { + "user_id": user_id, + "device_id": device_id, + "keys": keys, + } + + async def _handle_request( # type: ignore[override] + self, request: Request + ) -> Tuple[int, JsonDict]: + content = parse_json_object_from_request(request) + + user_id = content["user_id"] + device_id = content["device_id"] + keys = content["keys"] + + results = await self.e2e_keys_handler.upload_keys_for_user( + user_id, device_id, keys + ) + + return 200, results + + def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ReplicationUserDevicesResyncRestServlet(hs).register(http_server) + ReplicationUploadKeysForUserRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py index f653d2a3e1..ee038c7192 100644 --- a/synapse/rest/client/keys.py +++ b/synapse/rest/client/keys.py @@ -27,6 +27,7 @@ from synapse.http.servlet import ( ) from synapse.http.site import SynapseRequest from synapse.logging.opentracing import log_kv, set_tag +from synapse.replication.http.devices import ReplicationUploadKeysForUserRestServlet from synapse.rest.client._base import client_patterns, interactive_auth_handler from synapse.types import JsonDict, StreamToken from synapse.util.cancellation import cancellable @@ -43,24 +44,48 @@ class KeyUploadServlet(RestServlet): Content-Type: application/json { - "device_keys": { - "user_id": "", - "device_id": "", - "valid_until_ts": , - "algorithms": [ - "m.olm.curve25519-aes-sha2", - ] - "keys": { - ":": "", + "device_keys": { + "user_id": "", + "device_id": "", + "valid_until_ts": , + "algorithms": [ + "m.olm.curve25519-aes-sha2", + ] + "keys": { + ":": "", + }, + "signatures:" { + "" { + ":": "" + } + } + }, + "fallback_keys": { + ":": "", + "signed_:": { + "fallback": true, + "key": "", + "signatures": { + "": { + ":": "" + } + } + } + } + "one_time_keys": { + ":": "" }, - "signatures:" { - "" { - ":": "" - } } }, - "one_time_keys": { - ":": "" - }, } + + response, e.g.: + + { + "one_time_key_counts": { + "curve25519": 10, + "signed_curve25519": 20 + } + } + """ PATTERNS = client_patterns("/keys/upload(/(?P[^/]+))?$") @@ -71,6 +96,13 @@ class KeyUploadServlet(RestServlet): self.e2e_keys_handler = hs.get_e2e_keys_handler() self.device_handler = hs.get_device_handler() + if hs.config.worker.worker_app is None: + # if main process + self.key_uploader = self.e2e_keys_handler.upload_keys_for_user + else: + # then a worker + self.key_uploader = ReplicationUploadKeysForUserRestServlet.make_client(hs) + async def on_POST( self, request: SynapseRequest, device_id: Optional[str] ) -> Tuple[int, JsonDict]: @@ -109,8 +141,8 @@ class KeyUploadServlet(RestServlet): 400, "To upload keys, you must pass device_id when authenticating" ) - result = await self.e2e_keys_handler.upload_keys_for_user( - user_id, device_id, body + result = await self.key_uploader( + user_id=user_id, device_id=device_id, keys=body ) return 200, result -- cgit 1.5.1 From 8f10c8b054fc970838be9ae6f1f5aea95f166c98 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 28 Nov 2022 15:54:18 -0600 Subject: Move MSC3030 `/timestamp_to_event` endpoint to stable v1 location (#14471) Fix https://github.com/matrix-org/synapse/issues/14390 - Client API: `/_matrix/client/unstable/org.matrix.msc3030/rooms//timestamp_to_event?ts=&dir=` -> `/_matrix/client/v1/rooms//timestamp_to_event?ts=&dir=` - Federation API: `/_matrix/federation/unstable/org.matrix.msc3030/timestamp_to_event/?ts=&dir=` -> `/_matrix/federation/v1/timestamp_to_event/?ts=&dir=` Complement test changes: https://github.com/matrix-org/complement/pull/559 --- changelog.d/14471.feature | 1 + docker/complement/conf/workers-shared-extra.yaml.j2 | 2 -- docker/configure_workers_and_start.py | 2 ++ docs/workers.md | 2 ++ scripts-dev/complement.sh | 6 +++--- synapse/config/experimental.py | 3 --- synapse/federation/federation_client.py | 12 +++++++++++- synapse/federation/transport/client.py | 5 ++--- synapse/federation/transport/server/__init__.py | 8 -------- synapse/federation/transport/server/federation.py | 3 +-- synapse/rest/client/room.py | 10 +++------- synapse/rest/client/versions.py | 2 -- tests/rest/client/test_rooms.py | 7 +------ 13 files changed, 26 insertions(+), 37 deletions(-) create mode 100644 changelog.d/14471.feature (limited to 'docker') diff --git a/changelog.d/14471.feature b/changelog.d/14471.feature new file mode 100644 index 0000000000..a0e0c74f1a --- /dev/null +++ b/changelog.d/14471.feature @@ -0,0 +1 @@ +Move MSC3030 `/timestamp_to_event` endpoints to stable `v1` location (`/_matrix/client/v1/rooms//timestamp_to_event?ts=&dir=`, `/_matrix/federation/v1/timestamp_to_event/?ts=&dir=`). diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2 index 883a87159c..ca640c343b 100644 --- a/docker/complement/conf/workers-shared-extra.yaml.j2 +++ b/docker/complement/conf/workers-shared-extra.yaml.j2 @@ -100,8 +100,6 @@ experimental_features: # client-side support for partial state in /send_join responses faster_joins: true {% endif %} - # Enable jump to date endpoint - msc3030_enabled: true # Filtering /messages by relation type. msc3874_enabled: true diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index c1e1544536..58c62f2231 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -140,6 +140,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event", "^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms", "^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases", + "^/_matrix/client/v1/rooms/.*/timestamp_to_event$", "^/_matrix/client/(api/v1|r0|v3|unstable)/search", ], "shared_extra_conf": {}, @@ -163,6 +164,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = { "^/_matrix/federation/(v1|v2)/invite/", "^/_matrix/federation/(v1|v2)/query_auth/", "^/_matrix/federation/(v1|v2)/event_auth/", + "^/_matrix/federation/v1/timestamp_to_event/", "^/_matrix/federation/(v1|v2)/exchange_third_party_invite/", "^/_matrix/federation/(v1|v2)/user/devices/", "^/_matrix/federation/(v1|v2)/get_groups_publicised$", diff --git a/docs/workers.md b/docs/workers.md index 27e54c5846..2b65acb5ed 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -191,6 +191,7 @@ information. ^/_matrix/federation/(v1|v2)/send_leave/ ^/_matrix/federation/(v1|v2)/invite/ ^/_matrix/federation/v1/event_auth/ + ^/_matrix/federation/v1/timestamp_to_event/ ^/_matrix/federation/v1/exchange_third_party_invite/ ^/_matrix/federation/v1/user/devices/ ^/_matrix/key/v2/query @@ -218,6 +219,7 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$ ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event/ ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms$ + ^/_matrix/client/v1/rooms/.*/timestamp_to_event$ ^/_matrix/client/(api/v1|r0|v3|unstable)/search$ # Encryption requests diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 803c6ce92d..7744b47097 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -162,9 +162,9 @@ else # We only test faster room joins on monoliths, because they are purposefully # being developed without worker support to start with. # - # The tests for importing historical messages (MSC2716) and jump to date (MSC3030) - # also only pass with monoliths, currently. - test_tags="$test_tags,faster_joins,msc2716,msc3030" + # The tests for importing historical messages (MSC2716) also only pass with monoliths, + # currently. + test_tags="$test_tags,faster_joins,msc2716" fi diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index d4b71d1673..a503abf364 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -53,9 +53,6 @@ class ExperimentalConfig(Config): # MSC3266 (room summary api) self.msc3266_enabled: bool = experimental.get("msc3266_enabled", False) - # MSC3030 (Jump to date API endpoint) - self.msc3030_enabled: bool = experimental.get("msc3030_enabled", False) - # MSC2409 (this setting only relates to optionally sending to-device messages). # Presence, typing and read receipt EDUs are already sent to application services that # have opted in to receive them. If enabled, this adds to-device messages to that list. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index c4c0bc7315..8bccc9c60d 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -1691,9 +1691,19 @@ class FederationClient(FederationBase): # to return events on *both* sides of the timestamp to # help reconcile the gap faster. _timestamp_to_event_from_destination, + # Since this endpoint is new, we should try other servers before giving up. + # We can safely remove this in a year (remove after 2023-11-16). + failover_on_unknown_endpoint=True, ) return timestamp_to_event_response - except SynapseError: + except SynapseError as e: + logger.warn( + "timestamp_to_event(room_id=%s, timestamp=%s, direction=%s): encountered error when trying to fetch from destinations: %s", + room_id, + timestamp, + direction, + e, + ) return None async def _timestamp_to_event_from_destination( diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index a3cfc701cd..77f1f39cac 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -185,9 +185,8 @@ class TransportLayerClient: Raises: Various exceptions when the request fails """ - path = _create_path( - FEDERATION_UNSTABLE_PREFIX, - "/org.matrix.msc3030/timestamp_to_event/%s", + path = _create_v1_path( + "/timestamp_to_event/%s", room_id, ) diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py index 50623cd385..2725f53cf6 100644 --- a/synapse/federation/transport/server/__init__.py +++ b/synapse/federation/transport/server/__init__.py @@ -25,7 +25,6 @@ from synapse.federation.transport.server._base import ( from synapse.federation.transport.server.federation import ( FEDERATION_SERVLET_CLASSES, FederationAccountStatusServlet, - FederationTimestampLookupServlet, ) from synapse.http.server import HttpServer, JsonResource from synapse.http.servlet import ( @@ -291,13 +290,6 @@ def register_servlets( ) for servletclass in SERVLET_GROUPS[servlet_group]: - # Only allow the `/timestamp_to_event` servlet if msc3030 is enabled - if ( - servletclass == FederationTimestampLookupServlet - and not hs.config.experimental.msc3030_enabled - ): - continue - # Only allow the `/account_status` servlet if msc3720 is enabled if ( servletclass == FederationAccountStatusServlet diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index 205fd16daa..53e77b4bb6 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -218,14 +218,13 @@ class FederationTimestampLookupServlet(BaseFederationServerServlet): `dir` can be `f` or `b` to indicate forwards and backwards in time from the given timestamp. - GET /_matrix/federation/unstable/org.matrix.msc3030/timestamp_to_event/?ts=&dir= + GET /_matrix/federation/v1/timestamp_to_event/?ts=&dir= { "event_id": ... } """ PATH = "/timestamp_to_event/(?P[^/]*)/?" - PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc3030" async def on_GET( self, diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 91cb791139..636cc62877 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -1284,17 +1284,14 @@ class TimestampLookupRestServlet(RestServlet): `dir` can be `f` or `b` to indicate forwards and backwards in time from the given timestamp. - GET /_matrix/client/unstable/org.matrix.msc3030/rooms//timestamp_to_event?ts=&dir= + GET /_matrix/client/v1/rooms//timestamp_to_event?ts=&dir= { "event_id": ... } """ PATTERNS = ( - re.compile( - "^/_matrix/client/unstable/org.matrix.msc3030" - "/rooms/(?P[^/]*)/timestamp_to_event$" - ), + re.compile("^/_matrix/client/v1/rooms/(?P[^/]*)/timestamp_to_event$"), ) def __init__(self, hs: "HomeServer"): @@ -1421,8 +1418,7 @@ def register_servlets( RoomAliasListServlet(hs).register(http_server) SearchRestServlet(hs).register(http_server) RoomCreateRestServlet(hs).register(http_server) - if hs.config.experimental.msc3030_enabled: - TimestampLookupRestServlet(hs).register(http_server) + TimestampLookupRestServlet(hs).register(http_server) # Some servlets only get registered for the main process. if not is_worker: diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 180a11ef88..3c0a90010b 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -101,8 +101,6 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3827.stable": True, # Adds support for importing historical messages as per MSC2716 "org.matrix.msc2716": self.config.experimental.msc2716_enabled, - # Adds support for jump to date endpoints (/timestamp_to_event) as per MSC3030 - "org.matrix.msc3030": self.config.experimental.msc3030_enabled, # Adds support for thread relations, per MSC3440. "org.matrix.msc3440.stable": True, # TODO: remove when "v1.3" is added above # Support for thread read receipts & notification counts. diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index e919e089cb..b4daace556 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -3546,11 +3546,6 @@ class TimestampLookupTestCase(unittest.HomeserverTestCase): login.register_servlets, ] - def default_config(self) -> JsonDict: - config = super().default_config() - config["experimental_features"] = {"msc3030_enabled": True} - return config - def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self._storage_controllers = self.hs.get_storage_controllers() @@ -3592,7 +3587,7 @@ class TimestampLookupTestCase(unittest.HomeserverTestCase): channel = self.make_request( "GET", - f"/_matrix/client/unstable/org.matrix.msc3030/rooms/{room_id}/timestamp_to_event?dir=b&ts={outlier_event.origin_server_ts}", + f"/_matrix/client/v1/rooms/{room_id}/timestamp_to_event?dir=b&ts={outlier_event.origin_server_ts}", access_token=self.room_owner_tok, ) self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) -- cgit 1.5.1 From 22e91b8019ba07f1d014e106912cff33613b7629 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 7 Dec 2022 15:29:32 +0100 Subject: docker: remove useless cargo install with apt (#14636) Signed-off-by: Mathieu Velten --- changelog.d/14636.misc | 1 + docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14636.misc (limited to 'docker') diff --git a/changelog.d/14636.misc b/changelog.d/14636.misc new file mode 100644 index 0000000000..9d24f6888f --- /dev/null +++ b/changelog.d/14636.misc @@ -0,0 +1 @@ +Remove useless cargo install with apt from Dockerfile. diff --git a/docker/Dockerfile b/docker/Dockerfile index 7f8756e8a4..185d5bc3d4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -43,7 +43,7 @@ RUN \ --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ apt-get update -qq && apt-get install -yqq \ - build-essential cargo git libffi-dev libssl-dev \ + build-essential git libffi-dev libssl-dev \ && rm -rf /var/lib/apt/lists/* # We install poetry in its own build stage to avoid its dependencies conflicting with -- cgit 1.5.1 From be3a8a85e3820f1f0e713be58c366ac10c65cd28 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 7 Dec 2022 15:45:31 +0000 Subject: Add `--editable` flag to `complement.sh` which uses an editable install of Synapse for faster turn-around times whilst developing iteratively. (#14548) Co-authored-by: Mathieu Velten --- changelog.d/14548.misc | 1 + docker/Dockerfile-workers | 3 +- docker/complement/Dockerfile | 3 +- docker/editable.Dockerfile | 75 ++++++++++++++++++++++++++++++++ scripts-dev/complement.sh | 100 +++++++++++++++++++++++++++++++++++-------- 5 files changed, 162 insertions(+), 20 deletions(-) create mode 100644 changelog.d/14548.misc create mode 100644 docker/editable.Dockerfile (limited to 'docker') diff --git a/changelog.d/14548.misc b/changelog.d/14548.misc new file mode 100644 index 0000000000..416332015c --- /dev/null +++ b/changelog.d/14548.misc @@ -0,0 +1 @@ +Add `--editable` flag to `complement.sh` which uses an editable install of Synapse for faster turn-around times whilst developing iteratively. \ No newline at end of file diff --git a/docker/Dockerfile-workers b/docker/Dockerfile-workers index 0c2d4f3047..faf7f2cef8 100644 --- a/docker/Dockerfile-workers +++ b/docker/Dockerfile-workers @@ -1,6 +1,7 @@ # syntax=docker/dockerfile:1 ARG SYNAPSE_VERSION=latest +ARG FROM=matrixdotorg/synapse:$SYNAPSE_VERSION # first of all, we create a base image with an nginx which we can copy into the # target image. For repeated rebuilds, this is much faster than apt installing @@ -23,7 +24,7 @@ FROM debian:bullseye-slim AS deps_base FROM redis:6-bullseye AS redis_base # now build the final image, based on the the regular Synapse docker image -FROM matrixdotorg/synapse:$SYNAPSE_VERSION +FROM $FROM # Install supervisord with pip instead of apt, to avoid installing a second # copy of python. diff --git a/docker/complement/Dockerfile b/docker/complement/Dockerfile index c0935c99a8..be1aa1c55e 100644 --- a/docker/complement/Dockerfile +++ b/docker/complement/Dockerfile @@ -7,8 +7,9 @@ # https://github.com/matrix-org/synapse/blob/develop/docker/README-testing.md#testing-with-postgresql-and-single-or-multi-process-synapse ARG SYNAPSE_VERSION=latest +ARG FROM=matrixdotorg/synapse-workers:$SYNAPSE_VERSION -FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION +FROM $FROM # First of all, we copy postgres server from the official postgres image, # since for repeated rebuilds, this is much faster than apt installing # postgres each time. diff --git a/docker/editable.Dockerfile b/docker/editable.Dockerfile new file mode 100644 index 0000000000..0e8cf2e712 --- /dev/null +++ b/docker/editable.Dockerfile @@ -0,0 +1,75 @@ +# syntax=docker/dockerfile:1 +# This dockerfile builds an editable install of Synapse. +# +# Used by `complement.sh`. Not suitable for production use. + +ARG PYTHON_VERSION=3.9 + +### +### Stage 0: generate requirements.txt +### +# We hardcode the use of Debian bullseye here because this could change upstream +# and other Dockerfiles used for testing are expecting bullseye. +FROM docker.io/python:${PYTHON_VERSION}-slim-bullseye + +# Install Rust and other dependencies (stolen from normal Dockerfile) +# install the OS build deps +RUN \ + --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get update -qq && apt-get install -yqq \ + build-essential \ + libffi-dev \ + libjpeg-dev \ + libpq-dev \ + libssl-dev \ + libwebp-dev \ + libxml++2.6-dev \ + libxslt1-dev \ + openssl \ + zlib1g-dev \ + git \ + curl \ + gosu \ + libjpeg62-turbo \ + libpq5 \ + libwebp6 \ + xmlsec1 \ + libjemalloc2 \ + && rm -rf /var/lib/apt/lists/* +ENV RUSTUP_HOME=/rust +ENV CARGO_HOME=/cargo +ENV PATH=/cargo/bin:/rust/bin:$PATH +RUN mkdir /rust /cargo +RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable --profile minimal + + +# Make a base copy of the editable source tree, so that we have something to +# install and build now — even though it's going to be covered up by a mount +# at runtime. +COPY synapse /editable-src/synapse/ +COPY rust /editable-src/rust/ +# ... and what we need to `pip install`. +COPY pyproject.toml poetry.lock README.rst build_rust.py Cargo.toml Cargo.lock /editable-src/ + +RUN pip install poetry +RUN poetry config virtualenvs.create false +RUN cd /editable-src && poetry install --extras all + +# Make copies of useful things for inspection: +# - the Rust module (must be copied to the editable source tree before startup) +# - poetry.lock is useful for checking if dependencies have changed. +RUN cp /editable-src/synapse/synapse_rust.abi3.so /synapse_rust.abi3.so.bak +RUN cp /editable-src/poetry.lock /poetry.lock.bak + + +### Extra setup from original Dockerfile +COPY ./docker/start.py /start.py +COPY ./docker/conf /conf + +EXPOSE 8008/tcp 8009/tcp 8448/tcp + +ENTRYPOINT ["/start.py"] + +HEALTHCHECK --start-period=5s --interval=15s --timeout=5s \ + CMD curl -fSs http://localhost:8008/health || exit 1 diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 7744b47097..8741ba3e34 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -53,6 +53,12 @@ Run the complement test suite on Synapse. Only build the Docker images. Don't actually run Complement. Conflicts with -f/--fast. + -e, --editable + Use an editable build of Synapse, rebuilding the image if necessary. + This is suitable for use in development where a fast turn-around time + is important. + Not suitable for use in CI in case the editable environment is impure. + For help on arguments to 'go test', run 'go help testflag'. EOF } @@ -73,6 +79,9 @@ while [ $# -ge 1 ]; do "--build-only") skip_complement_run=1 ;; + "-e"|"--editable") + use_editable_synapse=1 + ;; *) # unknown arg: presumably an argument to gotest. break the loop. break @@ -96,25 +105,76 @@ if [[ -z "$COMPLEMENT_DIR" ]]; then echo "Checkout available at 'complement-${COMPLEMENT_REF}'" fi +if [ -n "$use_editable_synapse" ]; then + if [[ -e synapse/synapse_rust.abi3.so ]]; then + # In an editable install, back up the host's compiled Rust module to prevent + # inconvenience; the container will overwrite the module with its own copy. + mv -n synapse/synapse_rust.abi3.so synapse/synapse_rust.abi3.so~host + # And restore it on exit: + synapse_pkg=`realpath synapse` + trap "mv -f '$synapse_pkg/synapse_rust.abi3.so~host' '$synapse_pkg/synapse_rust.abi3.so'" EXIT + fi + + editable_mount="$(realpath .):/editable-src:z" + if docker inspect complement-synapse-editable &>/dev/null; then + # complement-synapse-editable already exists: see if we can still use it: + # - The Rust module must still be importable; it will fail to import if the Rust source has changed. + # - The Poetry lock file must be the same (otherwise we assume dependencies have changed) + + # First set up the module in the right place for an editable installation. + docker run --rm -v $editable_mount --entrypoint 'cp' complement-synapse-editable -- /synapse_rust.abi3.so.bak /editable-src/synapse/synapse_rust.abi3.so + + if (docker run --rm -v $editable_mount --entrypoint 'python' complement-synapse-editable -c 'import synapse.synapse_rust' \ + && docker run --rm -v $editable_mount --entrypoint 'diff' complement-synapse-editable --brief /editable-src/poetry.lock /poetry.lock.bak); then + skip_docker_build=1 + else + echo "Editable Synapse image is stale. Will rebuild." + unset skip_docker_build + fi + fi +fi + if [ -z "$skip_docker_build" ]; then - # Build the base Synapse image from the local checkout - echo_if_github "::group::Build Docker image: matrixdotorg/synapse" - docker build -t matrixdotorg/synapse \ - --build-arg TEST_ONLY_SKIP_DEP_HASH_VERIFICATION \ - --build-arg TEST_ONLY_IGNORE_POETRY_LOCKFILE \ - -f "docker/Dockerfile" . - echo_if_github "::endgroup::" - - # Build the workers docker image (from the base Synapse image we just built). - echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers" - docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" . - echo_if_github "::endgroup::" - - # Build the unified Complement image (from the worker Synapse image we just built). - echo_if_github "::group::Build Docker image: complement/Dockerfile" - docker build -t complement-synapse \ - -f "docker/complement/Dockerfile" "docker/complement" - echo_if_github "::endgroup::" + if [ -n "$use_editable_synapse" ]; then + + # Build a special image designed for use in development with editable + # installs. + docker build -t synapse-editable \ + -f "docker/editable.Dockerfile" . + + docker build -t synapse-workers-editable \ + --build-arg FROM=synapse-editable \ + -f "docker/Dockerfile-workers" . + + docker build -t complement-synapse-editable \ + --build-arg FROM=synapse-workers-editable \ + -f "docker/complement/Dockerfile" "docker/complement" + + # Prepare the Rust module + docker run --rm -v $editable_mount --entrypoint 'cp' complement-synapse-editable -- /synapse_rust.abi3.so.bak /editable-src/synapse/synapse_rust.abi3.so + + else + + # Build the base Synapse image from the local checkout + echo_if_github "::group::Build Docker image: matrixdotorg/synapse" + docker build -t matrixdotorg/synapse \ + --build-arg TEST_ONLY_SKIP_DEP_HASH_VERIFICATION \ + --build-arg TEST_ONLY_IGNORE_POETRY_LOCKFILE \ + -f "docker/Dockerfile" . + echo_if_github "::endgroup::" + + # Build the workers docker image (from the base Synapse image we just built). + echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers" + docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" . + echo_if_github "::endgroup::" + + # Build the unified Complement image (from the worker Synapse image we just built). + echo_if_github "::group::Build Docker image: complement/Dockerfile" + docker build -t complement-synapse \ + -f "docker/complement/Dockerfile" "docker/complement" + echo_if_github "::endgroup::" + + fi fi if [ -n "$skip_complement_run" ]; then @@ -123,6 +183,10 @@ if [ -n "$skip_complement_run" ]; then fi export COMPLEMENT_BASE_IMAGE=complement-synapse +if [ -n "$use_editable_synapse" ]; then + export COMPLEMENT_BASE_IMAGE=complement-synapse-editable + export COMPLEMENT_HOST_MOUNTS="$editable_mount" +fi extra_test_args=() -- cgit 1.5.1 From 2a3cd59dd06411a79fb7500970db1b98f0d87695 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 12 Dec 2022 13:21:17 +0100 Subject: Add optional ICU support for user search (#14464) Fixes #13655 This change uses ICU (International Components for Unicode) to improve boundary detection in user search. This change also adds a new dependency on libicu-dev and pkg-config for the Debian packages, which are available in all supported distros. --- changelog.d/14464.feature | 1 + debian/changelog | 7 +++ debian/control | 2 + docker/Dockerfile | 2 + docker/Dockerfile-dhvirtualenv | 2 + poetry.lock | 16 +++++- pyproject.toml | 7 +++ stubs/icu.pyi | 25 +++++++++ synapse/storage/databases/main/user_directory.py | 67 ++++++++++++++++++++++-- tests/storage/test_user_directory.py | 43 +++++++++++++++ 10 files changed, 166 insertions(+), 6 deletions(-) create mode 100644 changelog.d/14464.feature create mode 100644 stubs/icu.pyi (limited to 'docker') diff --git a/changelog.d/14464.feature b/changelog.d/14464.feature new file mode 100644 index 0000000000..688ea32117 --- /dev/null +++ b/changelog.d/14464.feature @@ -0,0 +1 @@ +Improve user search for international display names. diff --git a/debian/changelog b/debian/changelog index 163b7210bf..5d3c4f7d6b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +matrix-synapse-py3 (1.74.0~rc1) UNRELEASED; urgency=medium + + * New dependency on libicu-dev to provide improved results for user + search. + + -- Synapse Packaging team Tue, 06 Dec 2022 15:28:10 +0000 + matrix-synapse-py3 (1.73.0) stable; urgency=medium * New Synapse release 1.73.0. diff --git a/debian/control b/debian/control index 86f5a66d02..bc628cec08 100644 --- a/debian/control +++ b/debian/control @@ -8,6 +8,8 @@ Build-Depends: dh-virtualenv (>= 1.1), libsystemd-dev, libpq-dev, + libicu-dev, + pkg-config, lsb-release, python3-dev, python3, diff --git a/docker/Dockerfile b/docker/Dockerfile index 185d5bc3d4..7e5123210a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -97,6 +97,8 @@ RUN \ zlib1g-dev \ git \ curl \ + libicu-dev \ + pkg-config \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv index 73165f6f85..f3b5b00ce6 100644 --- a/docker/Dockerfile-dhvirtualenv +++ b/docker/Dockerfile-dhvirtualenv @@ -84,6 +84,8 @@ RUN apt-get update -qq -o Acquire::Languages=none \ python3-venv \ sqlite3 \ libpq-dev \ + libicu-dev \ + pkg-config \ xmlsec1 # Install rust and ensure it's in the PATH diff --git a/poetry.lock b/poetry.lock index cac22e2ef0..ccda8a23fb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -837,6 +837,14 @@ category = "dev" optional = false python-versions = ">=3.5" +[[package]] +name = "pyicu" +version = "2.10.2" +description = "Python extension wrapping the ICU C++ API" +category = "main" +optional = true +python-versions = "*" + [[package]] name = "pyjwt" version = "2.4.0" @@ -1622,7 +1630,7 @@ docs = ["Sphinx", "repoze.sphinx.autointerface"] test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"] [extras] -all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler"] +all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler", "pyicu"] cache-memory = ["Pympler"] jwt = ["authlib"] matrix-synapse-ldap3 = ["matrix-synapse-ldap3"] @@ -1635,11 +1643,12 @@ sentry = ["sentry-sdk"] systemd = ["systemd-python"] test = ["parameterized", "idna"] url-preview = ["lxml"] +user-search = ["pyicu"] [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "8c44ceeb9df5c3ab43040400e0a6b895de49417e61293a1ba027640b34f03263" +content-hash = "f20007013f33bc35a01e412c48adc62a936030f3074e06286674c5ad7f44d300" [metadata.files] attrs = [ @@ -2427,6 +2436,9 @@ pygments = [ {file = "Pygments-2.11.2-py3-none-any.whl", hash = "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65"}, {file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"}, ] +pyicu = [ + {file = "PyICU-2.10.2.tar.gz", hash = "sha256:0c3309eea7fab6857507ace62403515b60fe096cbfb4f90d14f55ff75c5441c1"}, +] pyjwt = [ {file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"}, {file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"}, diff --git a/pyproject.toml b/pyproject.toml index df59fa0562..bb383683cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -208,6 +208,7 @@ hiredis = { version = "*", optional = true } Pympler = { version = "*", optional = true } parameterized = { version = ">=0.7.4", optional = true } idna = { version = ">=2.5", optional = true } +pyicu = { version = ">=2.10.2", optional = true } [tool.poetry.extras] # NB: Packages that should be part of `pip install matrix-synapse[all]` need to be specified @@ -230,6 +231,10 @@ redis = ["txredisapi", "hiredis"] # Required to use experimental `caches.track_memory_usage` config option. cache-memory = ["pympler"] test = ["parameterized", "idna"] +# Allows for better search for international characters in the user directory. This +# requires libicu's development headers installed on the system (e.g. libicu-dev on +# Debian-based distributions). +user-search = ["pyicu"] # The duplication here is awful. I hate hate hate hate hate it. However, for now I want # to ensure you can still `pip install matrix-synapse[all]` like today. Two motivations: @@ -261,6 +266,8 @@ all = [ "txredisapi", "hiredis", # cache-memory "pympler", + # improved user search + "pyicu", # omitted: # - test: it's useful to have this separate from dev deps in the olddeps job # - systemd: this is a system-based requirement diff --git a/stubs/icu.pyi b/stubs/icu.pyi new file mode 100644 index 0000000000..efeda7938a --- /dev/null +++ b/stubs/icu.pyi @@ -0,0 +1,25 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Stub for PyICU. + +class Locale: + @staticmethod + def getDefault() -> Locale: ... + +class BreakIterator: + @staticmethod + def createWordInstance(locale: Locale) -> BreakIterator: ... + def setText(self, text: str) -> None: ... + def nextBoundary(self) -> int: ... diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index af9952f513..14ef5b040d 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -26,6 +26,14 @@ from typing import ( cast, ) +try: + # Figure out if ICU support is available for searching users. + import icu + + USE_ICU = True +except ModuleNotFoundError: + USE_ICU = False + from typing_extensions import TypedDict from synapse.api.errors import StoreError @@ -900,7 +908,7 @@ def _parse_query_sqlite(search_term: str) -> str: """ # Pull out the individual words, discarding any non-word characters. - results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) + results = _parse_words(search_term) return " & ".join("(%s* OR %s)" % (result, result) for result in results) @@ -910,12 +918,63 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]: We use this so that we can add prefix matching, which isn't something that is supported by default. """ - - # Pull out the individual words, discarding any non-word characters. - results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) + results = _parse_words(search_term) both = " & ".join("(%s:* | %s)" % (result, result) for result in results) exact = " & ".join("%s" % (result,) for result in results) prefix = " & ".join("%s:*" % (result,) for result in results) return both, exact, prefix + + +def _parse_words(search_term: str) -> List[str]: + """Split the provided search string into a list of its words. + + If support for ICU (International Components for Unicode) is available, use it. + Otherwise, fall back to using a regex to detect word boundaries. This latter + solution works well enough for most latin-based languages, but doesn't work as well + with other languages. + + Args: + search_term: The search string. + + Returns: + A list of the words in the search string. + """ + if USE_ICU: + return _parse_words_with_icu(search_term) + + return re.findall(r"([\w\-]+)", search_term, re.UNICODE) + + +def _parse_words_with_icu(search_term: str) -> List[str]: + """Break down the provided search string into its individual words using ICU + (International Components for Unicode). + + Args: + search_term: The search string. + + Returns: + A list of the words in the search string. + """ + results = [] + breaker = icu.BreakIterator.createWordInstance(icu.Locale.getDefault()) + breaker.setText(search_term) + i = 0 + while True: + j = breaker.nextBoundary() + if j < 0: + break + + result = search_term[i:j] + + # libicu considers spaces and punctuation between words as words, but we don't + # want to include those in results as they would result in syntax errors in SQL + # queries (e.g. "foo bar" would result in the search query including "foo & & + # bar"). + if len(re.findall(r"([\w\-]+)", result, re.UNICODE)): + results.append(result) + + i = j + + return results diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 88c7d5fec0..3ba896ecf3 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re from typing import Any, Dict, Set, Tuple from unittest import mock from unittest.mock import Mock, patch @@ -30,6 +31,12 @@ from synapse.util import Clock from tests.test_utils.event_injection import inject_member_event from tests.unittest import HomeserverTestCase, override_config +try: + import icu +except ImportError: + icu = None # type: ignore + + ALICE = "@alice:a" BOB = "@bob:b" BOBBY = "@bobby:a" @@ -467,3 +474,39 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): r["results"][0], {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, ) + + +class UserDirectoryICUTestCase(HomeserverTestCase): + if not icu: + skip = "Requires PyICU" + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.user_dir_helper = GetUserDirectoryTables(self.store) + + def test_icu_word_boundary(self) -> None: + """Tests that we correctly detect word boundaries when ICU (International + Components for Unicode) support is available. + """ + + display_name = "Gáo" + + # This word is not broken down correctly by Python's regular expressions, + # likely because á is actually a lowercase a followed by a U+0301 combining + # acute accent. This is specifically something that ICU support fixes. + matches = re.findall(r"([\w\-]+)", display_name, re.UNICODE) + self.assertEqual(len(matches), 2) + + self.get_success( + self.store.update_profile_in_user_dir(ALICE, display_name, None) + ) + self.get_success(self.store.add_users_in_public_rooms("!room:id", (ALICE,))) + + # Check that searching for this user yields the correct result. + r = self.get_success(self.store.search_user_dir(BOB, display_name, 10)) + self.assertFalse(r["limited"]) + self.assertEqual(len(r["results"]), 1) + self.assertDictEqual( + r["results"][0], + {"user_id": ALICE, "display_name": display_name, "avatar_url": None}, + ) -- cgit 1.5.1