diff options
author | Eric Eastwood <erice@element.io> | 2022-09-12 15:34:02 -0500 |
---|---|---|
committer | Eric Eastwood <erice@element.io> | 2022-09-12 15:34:02 -0500 |
commit | 6c40dfafcf32395fbc1fb4df3297c308ed003c3b (patch) | |
tree | 07a98db8f7c09f79bbf58758a0170768ed081ed7 | |
parent | poetry update (diff) | |
parent | Fix GHA skippable syntax (#13778) (diff) | |
download | synapse-6c40dfafcf32395fbc1fb4df3297c308ed003c3b.tar.xz |
Merge branch 'develop' into madlittlemods/11850-migrate-to-opentelemetry
-rw-r--r-- | .github/workflows/tests.yml | 4 | ||||
-rw-r--r-- | changelog.d/13727.doc | 1 | ||||
-rw-r--r-- | changelog.d/13745.misc | 1 | ||||
-rw-r--r-- | changelog.d/13759.misc | 1 | ||||
-rw-r--r-- | changelog.d/13769.misc | 1 | ||||
-rw-r--r-- | changelog.d/13778.misc | 1 | ||||
-rw-r--r-- | docs/usage/configuration/config_documentation.md | 2 | ||||
-rw-r--r-- | rust/Cargo.toml | 4 | ||||
-rw-r--r-- | rust/build.rs | 45 | ||||
-rw-r--r-- | rust/src/lib.rs | 10 | ||||
-rw-r--r-- | stubs/synapse/synapse_rust.pyi | 1 | ||||
-rw-r--r-- | synapse/__init__.py | 5 | ||||
-rw-r--r-- | synapse/storage/databases/main/roommember.py | 202 | ||||
-rw-r--r-- | synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py | 52 | ||||
-rw-r--r-- | synapse/util/rust.py | 84 |
15 files changed, 256 insertions, 158 deletions
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7c4ae3d7ff..bf70f8373e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -486,5 +486,7 @@ jobs: needs: ${{ toJSON(needs) }} # The newsfile lint may be skipped on non PR builds - skippable: + # Cargo test is skipped if there is no changes on Rust code + skippable: | lint-newsfile + cargo-test diff --git a/changelog.d/13727.doc b/changelog.d/13727.doc new file mode 100644 index 0000000000..ba530b409d --- /dev/null +++ b/changelog.d/13727.doc @@ -0,0 +1 @@ +Fix a typo in the documentation for the login ratelimiting configuration. diff --git a/changelog.d/13745.misc b/changelog.d/13745.misc new file mode 100644 index 0000000000..e97a789c0e --- /dev/null +++ b/changelog.d/13745.misc @@ -0,0 +1 @@ +Remove old queries to join room memberships to current state events. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13759.misc b/changelog.d/13759.misc new file mode 100644 index 0000000000..f91c512483 --- /dev/null +++ b/changelog.d/13759.misc @@ -0,0 +1 @@ +Add a check for editable installs if the Rust library needs rebuilding. diff --git a/changelog.d/13769.misc b/changelog.d/13769.misc new file mode 100644 index 0000000000..2e0dd68a0f --- /dev/null +++ b/changelog.d/13769.misc @@ -0,0 +1 @@ +Add a stub Rust crate. diff --git a/changelog.d/13778.misc b/changelog.d/13778.misc new file mode 100644 index 0000000000..2e0dd68a0f --- /dev/null +++ b/changelog.d/13778.misc @@ -0,0 +1 @@ +Add a stub Rust crate. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 9045cfcb90..cdd4ebeaee 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1394,7 +1394,7 @@ This option specifies several limits for login: client is attempting to log into. Defaults to `per_second: 0.17`, `burst_count: 3`. -* `failted_attempts` ratelimits login requests based on the account the +* `failed_attempts` ratelimits login requests based on the account the client is attempting to log into, based on the amount of failed login attempts for this account. Defaults to `per_second: 0.17`, `burst_count: 3`. diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 0a9760cafc..deddf3cec2 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -19,3 +19,7 @@ name = "synapse.synapse_rust" [dependencies] pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] } + +[build-dependencies] +blake2 = "0.10.4" +hex = "0.4.3" diff --git a/rust/build.rs b/rust/build.rs new file mode 100644 index 0000000000..2117975e56 --- /dev/null +++ b/rust/build.rs @@ -0,0 +1,45 @@ +//! This build script calculates the hash of all files in the `src/` +//! directory and adds it as an environment variable during build time. +//! +//! This is used so that the python code can detect when the built native module +//! does not match the source in-tree, helping to detect the case where the +//! source has been updated but the library hasn't been rebuilt. + +use std::path::PathBuf; + +use blake2::{Blake2b512, Digest}; + +fn main() -> Result<(), std::io::Error> { + let mut dirs = vec![PathBuf::from("src")]; + + let mut paths = Vec::new(); + while let Some(path) = dirs.pop() { + let mut entries = std::fs::read_dir(path)? + .map(|res| res.map(|e| e.path())) + .collect::<Result<Vec<_>, std::io::Error>>()?; + + entries.sort(); + + for entry in entries { + if entry.is_dir() { + dirs.push(entry) + } else { + paths.push(entry.to_str().expect("valid rust paths").to_string()); + } + } + } + + paths.sort(); + + let mut hasher = Blake2b512::new(); + + for path in paths { + let bytes = std::fs::read(path)?; + hasher.update(bytes); + } + + let hex_digest = hex::encode(hasher.finalize()); + println!("cargo:rustc-env=SYNAPSE_RUST_DIGEST={hex_digest}"); + + Ok(()) +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 142fc2ed93..ba42465fb8 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,5 +1,13 @@ use pyo3::prelude::*; +/// Returns the hash of all the rust source files at the time it was compiled. +/// +/// Used by python to detect if the rust library is outdated. +#[pyfunction] +fn get_rust_file_digest() -> &'static str { + env!("SYNAPSE_RUST_DIGEST") +} + /// Formats the sum of two numbers as string. #[pyfunction] #[pyo3(text_signature = "(a, b, /)")] @@ -11,6 +19,6 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> { #[pymodule] fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(sum_as_string, m)?)?; - + m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?; Ok(()) } diff --git a/stubs/synapse/synapse_rust.pyi b/stubs/synapse/synapse_rust.pyi index 5b51ba05d7..8658d3138f 100644 --- a/stubs/synapse/synapse_rust.pyi +++ b/stubs/synapse/synapse_rust.pyi @@ -1 +1,2 @@ def sum_as_string(a: int, b: int) -> str: ... +def get_rust_file_digest() -> str: ... diff --git a/synapse/__init__.py b/synapse/__init__.py index b1369aca8f..1bed6393bd 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -20,6 +20,8 @@ import json import os import sys +from synapse.util.rust import check_rust_lib_up_to_date + # Check that we're not running on an unsupported Python version. if sys.version_info < (3, 7): print("Synapse requires Python 3.7 or above.") @@ -78,3 +80,6 @@ if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): from synapse.util.patch_inline_callbacks import do_patch do_patch() + + +check_rust_lib_up_to_date() diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 6e1ff5626b..fdb4684e12 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -32,10 +32,7 @@ import attr from synapse.api.constants import EventTypes, Membership from synapse.metrics import LaterGauge -from synapse.metrics.background_process_metrics import ( - run_as_background_process, - wrap_as_background_process, -) +from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -91,16 +88,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): # at a time. Keyed by room_id. self._joined_host_linearizer = Linearizer("_JoinedHostsCache") - # Is the current_state_events.membership up to date? Or is the - # background update still running? - self._current_state_events_membership_up_to_date = False - - txn = db_conn.cursor( - txn_name="_check_safe_current_state_events_membership_updated" - ) - self._check_safe_current_state_events_membership_updated_txn(txn) - txn.close() - if ( self.hs.config.worker.run_background_tasks and self.hs.config.metrics.metrics_flags.known_servers @@ -157,34 +144,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): self._known_servers_count = max([count, 1]) return self._known_servers_count - def _check_safe_current_state_events_membership_updated_txn( - self, txn: LoggingTransaction - ) -> None: - """Checks if it is safe to assume the new current_state_events - membership column is up to date - """ - - pending_update = self.db_pool.simple_select_one_txn( - txn, - table="background_updates", - keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME}, - retcols=["update_name"], - allow_none=True, - ) - - self._current_state_events_membership_up_to_date = not pending_update - - # If the update is still running, reschedule to run. - if pending_update: - self._clock.call_later( - 15.0, - run_as_background_process, - "_check_safe_current_state_events_membership_updated", - self.db_pool.runInteraction, - "_check_safe_current_state_events_membership_updated", - self._check_safe_current_state_events_membership_updated_txn, - ) - @cached(max_entries=100000, iterable=True) async def get_users_in_room(self, room_id: str) -> List[str]: """ @@ -212,31 +171,14 @@ class RoomMemberWorkerStore(EventsWorkerStore): `get_current_hosts_in_room()` and so we can re-use the cache but it's not horrible to have here either. """ - # If we can assume current_state_events.membership is up to date - # then we can avoid a join, which is a Very Good Thing given how - # frequently this function gets called. - if self._current_state_events_membership_up_to_date: - sql = """ - SELECT c.state_key FROM current_state_events as c - /* Get the depth of the event from the events table */ - INNER JOIN events AS e USING (event_id) - WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? - /* Sorted by lowest depth first */ - ORDER BY e.depth ASC; - """ - else: - sql = """ - SELECT c.state_key FROM room_memberships as m - /* Get the depth of the event from the events table */ - INNER JOIN events AS e USING (event_id) - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? - /* Sorted by lowest depth first */ - ORDER BY e.depth ASC; - """ + sql = """ + SELECT c.state_key FROM current_state_events as c + /* Get the depth of the event from the events table */ + INNER JOIN events AS e USING (event_id) + WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? + /* Sorted by lowest depth first */ + ORDER BY e.depth ASC; + """ txn.execute(sql, (room_id, Membership.JOIN)) return [r[0] for r in txn] @@ -353,28 +295,14 @@ class RoomMemberWorkerStore(EventsWorkerStore): # We do this all in one transaction to keep the cache small. # FIXME: get rid of this when we have room_stats - # If we can assume current_state_events.membership is up to date - # then we can avoid a join, which is a Very Good Thing given how - # frequently this function gets called. - if self._current_state_events_membership_up_to_date: - # Note, rejected events will have a null membership field, so - # we we manually filter them out. - sql = """ - SELECT count(*), membership FROM current_state_events - WHERE type = 'm.room.member' AND room_id = ? - AND membership IS NOT NULL - GROUP BY membership - """ - else: - sql = """ - SELECT count(*), m.membership FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? - GROUP BY m.membership - """ + # Note, rejected events will have a null membership field, so + # we we manually filter them out. + sql = """ + SELECT count(*), membership FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? + AND membership IS NOT NULL + GROUP BY membership + """ txn.execute(sql, (room_id,)) res: Dict[str, MemberSummary] = {} @@ -383,30 +311,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): # we order by membership and then fairly arbitrarily by event_id so # heroes are consistent - if self._current_state_events_membership_up_to_date: - # Note, rejected events will have a null membership field, so - # we we manually filter them out. - sql = """ - SELECT state_key, membership, event_id - FROM current_state_events - WHERE type = 'm.room.member' AND room_id = ? - AND membership IS NOT NULL - ORDER BY - CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, - event_id ASC - LIMIT ? - """ - else: - sql = """ - SELECT c.state_key, m.membership, c.event_id - FROM room_memberships as m - INNER JOIN current_state_events as c USING (room_id, event_id) - WHERE c.type = 'm.room.member' AND c.room_id = ? - ORDER BY - CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, - c.event_id ASC - LIMIT ? - """ + # Note, rejected events will have a null membership field, so + # we we manually filter them out. + sql = """ + SELECT state_key, membership, event_id + FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? + AND membership IS NOT NULL + ORDER BY + CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, + event_id ASC + LIMIT ? + """ # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user. txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6)) @@ -649,27 +565,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): # We use `current_state_events` here and not `local_current_membership` # as a) this gets called with remote users and b) this only gets called # for rooms the server is participating in. - if self._current_state_events_membership_up_to_date: - sql = """ - SELECT room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND c.state_key = ? - AND c.membership = ? - """ - else: - sql = """ - SELECT room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN room_memberships AS m USING (room_id, event_id) - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND c.state_key = ? - AND m.membership = ? - """ + sql = """ + SELECT room_id, e.instance_name, e.stream_ordering + FROM current_state_events AS c + INNER JOIN events AS e USING (room_id, event_id) + WHERE + c.type = 'm.room.member' + AND c.state_key = ? + AND c.membership = ? + """ txn.execute(sql, (user_id, Membership.JOIN)) return frozenset( @@ -707,27 +611,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): user_ids, ) - if self._current_state_events_membership_up_to_date: - sql = f""" - SELECT c.state_key, room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND c.membership = ? - AND {clause} - """ - else: - sql = f""" - SELECT c.state_key, room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN room_memberships AS m USING (room_id, event_id) - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND m.membership = ? - AND {clause} - """ + sql = f""" + SELECT c.state_key, room_id, e.instance_name, e.stream_ordering + FROM current_state_events AS c + INNER JOIN events AS e USING (room_id, event_id) + WHERE + c.type = 'm.room.member' + AND c.membership = ? + AND {clause} + """ txn.execute(sql, [Membership.JOIN] + args) diff --git a/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py new file mode 100644 index 0000000000..b5853d125c --- /dev/null +++ b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py @@ -0,0 +1,52 @@ +# Copyright 2022 Beeper +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Forces through the `current_state_events_membership` background job so checks +for its completion can be removed. + +Note the background job must still remain defined in the database class. +""" + + +def run_upgrade(cur, database_engine, *args, **kwargs): + cur.execute("SELECT update_name FROM background_updates") + rows = cur.fetchall() + for row in rows: + if row[0] == "current_state_events_membership": + break + # No pending background job so nothing to do here + else: + return + + # Populate membership field for all current_state_events, this may take + # a while but was originally handled via a background update in 2019. + cur.execute( + """ + UPDATE current_state_events + SET membership = ( + SELECT membership FROM room_memberships + WHERE event_id = current_state_events.event_id + ) + """ + ) + + # Finally, delete the background job because we've handled it above + cur.execute( + """ + DELETE FROM background_updates + WHERE update_name = 'current_state_events_membership' + """ + ) diff --git a/synapse/util/rust.py b/synapse/util/rust.py new file mode 100644 index 0000000000..30ecb9ffd9 --- /dev/null +++ b/synapse/util/rust.py @@ -0,0 +1,84 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from hashlib import blake2b + +import synapse +from synapse.synapse_rust import get_rust_file_digest + + +def check_rust_lib_up_to_date() -> None: + """For editable installs check if the rust library is outdated and needs to + be rebuilt. + """ + + if not _dist_is_editable(): + return + + synapse_dir = os.path.dirname(synapse.__file__) + synapse_root = os.path.abspath(os.path.join(synapse_dir, "..")) + + # Double check we've not gone into site-packages... + if os.path.basename(synapse_root) == "site-packages": + return + + # ... and it looks like the root of a python project. + if not os.path.exists("pyproject.toml"): + return + + # Get the hash of all Rust source files + hash = _hash_rust_files_in_directory(os.path.join(synapse_root, "rust", "src")) + + if hash != get_rust_file_digest(): + raise Exception("Rust module outdated. Please rebuild using `poetry install`") + + +def _hash_rust_files_in_directory(directory: str) -> str: + """Get the hash of all files in a directory (recursively)""" + + directory = os.path.abspath(directory) + + paths = [] + + dirs = [directory] + while dirs: + dir = dirs.pop() + with os.scandir(dir) as d: + for entry in d: + if entry.is_dir(): + dirs.append(entry.path) + else: + paths.append(entry.path) + + # We sort to make sure that we get a consistent and well-defined ordering. + paths.sort() + + hasher = blake2b() + + for path in paths: + with open(os.path.join(directory, path), "rb") as f: + hasher.update(f.read()) + + return hasher.hexdigest() + + +def _dist_is_editable() -> bool: + """Is distribution an editable install?""" + for path_item in sys.path: + egg_link = os.path.join(path_item, "matrix-synapse.egg-link") + if os.path.isfile(egg_link): + return True + return False |