diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 7c4ae3d7ff..bf70f8373e 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -486,5 +486,7 @@ jobs:
needs: ${{ toJSON(needs) }}
# The newsfile lint may be skipped on non PR builds
- skippable:
+ # Cargo test is skipped if there is no changes on Rust code
+ skippable: |
lint-newsfile
+ cargo-test
diff --git a/changelog.d/13727.doc b/changelog.d/13727.doc
new file mode 100644
index 0000000000..ba530b409d
--- /dev/null
+++ b/changelog.d/13727.doc
@@ -0,0 +1 @@
+Fix a typo in the documentation for the login ratelimiting configuration.
diff --git a/changelog.d/13745.misc b/changelog.d/13745.misc
new file mode 100644
index 0000000000..e97a789c0e
--- /dev/null
+++ b/changelog.d/13745.misc
@@ -0,0 +1 @@
+Remove old queries to join room memberships to current state events. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/changelog.d/13759.misc b/changelog.d/13759.misc
new file mode 100644
index 0000000000..f91c512483
--- /dev/null
+++ b/changelog.d/13759.misc
@@ -0,0 +1 @@
+Add a check for editable installs if the Rust library needs rebuilding.
diff --git a/changelog.d/13769.misc b/changelog.d/13769.misc
new file mode 100644
index 0000000000..2e0dd68a0f
--- /dev/null
+++ b/changelog.d/13769.misc
@@ -0,0 +1 @@
+Add a stub Rust crate.
diff --git a/changelog.d/13778.misc b/changelog.d/13778.misc
new file mode 100644
index 0000000000..2e0dd68a0f
--- /dev/null
+++ b/changelog.d/13778.misc
@@ -0,0 +1 @@
+Add a stub Rust crate.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 9045cfcb90..cdd4ebeaee 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1394,7 +1394,7 @@ This option specifies several limits for login:
client is attempting to log into. Defaults to `per_second: 0.17`,
`burst_count: 3`.
-* `failted_attempts` ratelimits login requests based on the account the
+* `failed_attempts` ratelimits login requests based on the account the
client is attempting to log into, based on the amount of failed login
attempts for this account. Defaults to `per_second: 0.17`, `burst_count: 3`.
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 0a9760cafc..deddf3cec2 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -19,3 +19,7 @@ name = "synapse.synapse_rust"
[dependencies]
pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] }
+
+[build-dependencies]
+blake2 = "0.10.4"
+hex = "0.4.3"
diff --git a/rust/build.rs b/rust/build.rs
new file mode 100644
index 0000000000..2117975e56
--- /dev/null
+++ b/rust/build.rs
@@ -0,0 +1,45 @@
+//! This build script calculates the hash of all files in the `src/`
+//! directory and adds it as an environment variable during build time.
+//!
+//! This is used so that the python code can detect when the built native module
+//! does not match the source in-tree, helping to detect the case where the
+//! source has been updated but the library hasn't been rebuilt.
+
+use std::path::PathBuf;
+
+use blake2::{Blake2b512, Digest};
+
+fn main() -> Result<(), std::io::Error> {
+ let mut dirs = vec![PathBuf::from("src")];
+
+ let mut paths = Vec::new();
+ while let Some(path) = dirs.pop() {
+ let mut entries = std::fs::read_dir(path)?
+ .map(|res| res.map(|e| e.path()))
+ .collect::<Result<Vec<_>, std::io::Error>>()?;
+
+ entries.sort();
+
+ for entry in entries {
+ if entry.is_dir() {
+ dirs.push(entry)
+ } else {
+ paths.push(entry.to_str().expect("valid rust paths").to_string());
+ }
+ }
+ }
+
+ paths.sort();
+
+ let mut hasher = Blake2b512::new();
+
+ for path in paths {
+ let bytes = std::fs::read(path)?;
+ hasher.update(bytes);
+ }
+
+ let hex_digest = hex::encode(hasher.finalize());
+ println!("cargo:rustc-env=SYNAPSE_RUST_DIGEST={hex_digest}");
+
+ Ok(())
+}
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 142fc2ed93..ba42465fb8 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -1,5 +1,13 @@
use pyo3::prelude::*;
+/// Returns the hash of all the rust source files at the time it was compiled.
+///
+/// Used by python to detect if the rust library is outdated.
+#[pyfunction]
+fn get_rust_file_digest() -> &'static str {
+ env!("SYNAPSE_RUST_DIGEST")
+}
+
/// Formats the sum of two numbers as string.
#[pyfunction]
#[pyo3(text_signature = "(a, b, /)")]
@@ -11,6 +19,6 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
#[pymodule]
fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
-
+ m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
Ok(())
}
diff --git a/stubs/synapse/synapse_rust.pyi b/stubs/synapse/synapse_rust.pyi
index 5b51ba05d7..8658d3138f 100644
--- a/stubs/synapse/synapse_rust.pyi
+++ b/stubs/synapse/synapse_rust.pyi
@@ -1 +1,2 @@
def sum_as_string(a: int, b: int) -> str: ...
+def get_rust_file_digest() -> str: ...
diff --git a/synapse/__init__.py b/synapse/__init__.py
index b1369aca8f..1bed6393bd 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -20,6 +20,8 @@ import json
import os
import sys
+from synapse.util.rust import check_rust_lib_up_to_date
+
# Check that we're not running on an unsupported Python version.
if sys.version_info < (3, 7):
print("Synapse requires Python 3.7 or above.")
@@ -78,3 +80,6 @@ if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
from synapse.util.patch_inline_callbacks import do_patch
do_patch()
+
+
+check_rust_lib_up_to_date()
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 6e1ff5626b..fdb4684e12 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -32,10 +32,7 @@ import attr
from synapse.api.constants import EventTypes, Membership
from synapse.metrics import LaterGauge
-from synapse.metrics.background_process_metrics import (
- run_as_background_process,
- wrap_as_background_process,
-)
+from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import (
DatabasePool,
@@ -91,16 +88,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# at a time. Keyed by room_id.
self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
- # Is the current_state_events.membership up to date? Or is the
- # background update still running?
- self._current_state_events_membership_up_to_date = False
-
- txn = db_conn.cursor(
- txn_name="_check_safe_current_state_events_membership_updated"
- )
- self._check_safe_current_state_events_membership_updated_txn(txn)
- txn.close()
-
if (
self.hs.config.worker.run_background_tasks
and self.hs.config.metrics.metrics_flags.known_servers
@@ -157,34 +144,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
self._known_servers_count = max([count, 1])
return self._known_servers_count
- def _check_safe_current_state_events_membership_updated_txn(
- self, txn: LoggingTransaction
- ) -> None:
- """Checks if it is safe to assume the new current_state_events
- membership column is up to date
- """
-
- pending_update = self.db_pool.simple_select_one_txn(
- txn,
- table="background_updates",
- keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
- retcols=["update_name"],
- allow_none=True,
- )
-
- self._current_state_events_membership_up_to_date = not pending_update
-
- # If the update is still running, reschedule to run.
- if pending_update:
- self._clock.call_later(
- 15.0,
- run_as_background_process,
- "_check_safe_current_state_events_membership_updated",
- self.db_pool.runInteraction,
- "_check_safe_current_state_events_membership_updated",
- self._check_safe_current_state_events_membership_updated_txn,
- )
-
@cached(max_entries=100000, iterable=True)
async def get_users_in_room(self, room_id: str) -> List[str]:
"""
@@ -212,31 +171,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
`get_current_hosts_in_room()` and so we can re-use the cache but it's
not horrible to have here either.
"""
- # If we can assume current_state_events.membership is up to date
- # then we can avoid a join, which is a Very Good Thing given how
- # frequently this function gets called.
- if self._current_state_events_membership_up_to_date:
- sql = """
- SELECT c.state_key FROM current_state_events as c
- /* Get the depth of the event from the events table */
- INNER JOIN events AS e USING (event_id)
- WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
- /* Sorted by lowest depth first */
- ORDER BY e.depth ASC;
- """
- else:
- sql = """
- SELECT c.state_key FROM room_memberships as m
- /* Get the depth of the event from the events table */
- INNER JOIN events AS e USING (event_id)
- INNER JOIN current_state_events as c
- ON m.event_id = c.event_id
- AND m.room_id = c.room_id
- AND m.user_id = c.state_key
- WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
- /* Sorted by lowest depth first */
- ORDER BY e.depth ASC;
- """
+ sql = """
+ SELECT c.state_key FROM current_state_events as c
+ /* Get the depth of the event from the events table */
+ INNER JOIN events AS e USING (event_id)
+ WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
+ /* Sorted by lowest depth first */
+ ORDER BY e.depth ASC;
+ """
txn.execute(sql, (room_id, Membership.JOIN))
return [r[0] for r in txn]
@@ -353,28 +295,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# We do this all in one transaction to keep the cache small.
# FIXME: get rid of this when we have room_stats
- # If we can assume current_state_events.membership is up to date
- # then we can avoid a join, which is a Very Good Thing given how
- # frequently this function gets called.
- if self._current_state_events_membership_up_to_date:
- # Note, rejected events will have a null membership field, so
- # we we manually filter them out.
- sql = """
- SELECT count(*), membership FROM current_state_events
- WHERE type = 'm.room.member' AND room_id = ?
- AND membership IS NOT NULL
- GROUP BY membership
- """
- else:
- sql = """
- SELECT count(*), m.membership FROM room_memberships as m
- INNER JOIN current_state_events as c
- ON m.event_id = c.event_id
- AND m.room_id = c.room_id
- AND m.user_id = c.state_key
- WHERE c.type = 'm.room.member' AND c.room_id = ?
- GROUP BY m.membership
- """
+ # Note, rejected events will have a null membership field, so
+ # we we manually filter them out.
+ sql = """
+ SELECT count(*), membership FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ?
+ AND membership IS NOT NULL
+ GROUP BY membership
+ """
txn.execute(sql, (room_id,))
res: Dict[str, MemberSummary] = {}
@@ -383,30 +311,18 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# we order by membership and then fairly arbitrarily by event_id so
# heroes are consistent
- if self._current_state_events_membership_up_to_date:
- # Note, rejected events will have a null membership field, so
- # we we manually filter them out.
- sql = """
- SELECT state_key, membership, event_id
- FROM current_state_events
- WHERE type = 'm.room.member' AND room_id = ?
- AND membership IS NOT NULL
- ORDER BY
- CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
- event_id ASC
- LIMIT ?
- """
- else:
- sql = """
- SELECT c.state_key, m.membership, c.event_id
- FROM room_memberships as m
- INNER JOIN current_state_events as c USING (room_id, event_id)
- WHERE c.type = 'm.room.member' AND c.room_id = ?
- ORDER BY
- CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
- c.event_id ASC
- LIMIT ?
- """
+ # Note, rejected events will have a null membership field, so
+ # we we manually filter them out.
+ sql = """
+ SELECT state_key, membership, event_id
+ FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ?
+ AND membership IS NOT NULL
+ ORDER BY
+ CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+ event_id ASC
+ LIMIT ?
+ """
# 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
@@ -649,27 +565,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# We use `current_state_events` here and not `local_current_membership`
# as a) this gets called with remote users and b) this only gets called
# for rooms the server is participating in.
- if self._current_state_events_membership_up_to_date:
- sql = """
- SELECT room_id, e.instance_name, e.stream_ordering
- FROM current_state_events AS c
- INNER JOIN events AS e USING (room_id, event_id)
- WHERE
- c.type = 'm.room.member'
- AND c.state_key = ?
- AND c.membership = ?
- """
- else:
- sql = """
- SELECT room_id, e.instance_name, e.stream_ordering
- FROM current_state_events AS c
- INNER JOIN room_memberships AS m USING (room_id, event_id)
- INNER JOIN events AS e USING (room_id, event_id)
- WHERE
- c.type = 'm.room.member'
- AND c.state_key = ?
- AND m.membership = ?
- """
+ sql = """
+ SELECT room_id, e.instance_name, e.stream_ordering
+ FROM current_state_events AS c
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ c.type = 'm.room.member'
+ AND c.state_key = ?
+ AND c.membership = ?
+ """
txn.execute(sql, (user_id, Membership.JOIN))
return frozenset(
@@ -707,27 +611,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
user_ids,
)
- if self._current_state_events_membership_up_to_date:
- sql = f"""
- SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
- FROM current_state_events AS c
- INNER JOIN events AS e USING (room_id, event_id)
- WHERE
- c.type = 'm.room.member'
- AND c.membership = ?
- AND {clause}
- """
- else:
- sql = f"""
- SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
- FROM current_state_events AS c
- INNER JOIN room_memberships AS m USING (room_id, event_id)
- INNER JOIN events AS e USING (room_id, event_id)
- WHERE
- c.type = 'm.room.member'
- AND m.membership = ?
- AND {clause}
- """
+ sql = f"""
+ SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
+ FROM current_state_events AS c
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ c.type = 'm.room.member'
+ AND c.membership = ?
+ AND {clause}
+ """
txn.execute(sql, [Membership.JOIN] + args)
diff --git a/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
new file mode 100644
index 0000000000..b5853d125c
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
@@ -0,0 +1,52 @@
+# Copyright 2022 Beeper
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+Forces through the `current_state_events_membership` background job so checks
+for its completion can be removed.
+
+Note the background job must still remain defined in the database class.
+"""
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ cur.execute("SELECT update_name FROM background_updates")
+ rows = cur.fetchall()
+ for row in rows:
+ if row[0] == "current_state_events_membership":
+ break
+ # No pending background job so nothing to do here
+ else:
+ return
+
+ # Populate membership field for all current_state_events, this may take
+ # a while but was originally handled via a background update in 2019.
+ cur.execute(
+ """
+ UPDATE current_state_events
+ SET membership = (
+ SELECT membership FROM room_memberships
+ WHERE event_id = current_state_events.event_id
+ )
+ """
+ )
+
+ # Finally, delete the background job because we've handled it above
+ cur.execute(
+ """
+ DELETE FROM background_updates
+ WHERE update_name = 'current_state_events_membership'
+ """
+ )
diff --git a/synapse/util/rust.py b/synapse/util/rust.py
new file mode 100644
index 0000000000..30ecb9ffd9
--- /dev/null
+++ b/synapse/util/rust.py
@@ -0,0 +1,84 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+from hashlib import blake2b
+
+import synapse
+from synapse.synapse_rust import get_rust_file_digest
+
+
+def check_rust_lib_up_to_date() -> None:
+ """For editable installs check if the rust library is outdated and needs to
+ be rebuilt.
+ """
+
+ if not _dist_is_editable():
+ return
+
+ synapse_dir = os.path.dirname(synapse.__file__)
+ synapse_root = os.path.abspath(os.path.join(synapse_dir, ".."))
+
+ # Double check we've not gone into site-packages...
+ if os.path.basename(synapse_root) == "site-packages":
+ return
+
+ # ... and it looks like the root of a python project.
+ if not os.path.exists("pyproject.toml"):
+ return
+
+ # Get the hash of all Rust source files
+ hash = _hash_rust_files_in_directory(os.path.join(synapse_root, "rust", "src"))
+
+ if hash != get_rust_file_digest():
+ raise Exception("Rust module outdated. Please rebuild using `poetry install`")
+
+
+def _hash_rust_files_in_directory(directory: str) -> str:
+ """Get the hash of all files in a directory (recursively)"""
+
+ directory = os.path.abspath(directory)
+
+ paths = []
+
+ dirs = [directory]
+ while dirs:
+ dir = dirs.pop()
+ with os.scandir(dir) as d:
+ for entry in d:
+ if entry.is_dir():
+ dirs.append(entry.path)
+ else:
+ paths.append(entry.path)
+
+ # We sort to make sure that we get a consistent and well-defined ordering.
+ paths.sort()
+
+ hasher = blake2b()
+
+ for path in paths:
+ with open(os.path.join(directory, path), "rb") as f:
+ hasher.update(f.read())
+
+ return hasher.hexdigest()
+
+
+def _dist_is_editable() -> bool:
+ """Is distribution an editable install?"""
+ for path_item in sys.path:
+ egg_link = os.path.join(path_item, "matrix-synapse.egg-link")
+ if os.path.isfile(egg_link):
+ return True
+ return False
|