diff options
author | Erik Johnston <erik@matrix.org> | 2022-03-11 18:10:36 +0000 |
---|---|---|
committer | Erik Johnston <erik@matrix.org> | 2022-03-11 18:10:36 +0000 |
commit | 07fb52b5d514b8ac6076126cf0c19724507aa910 (patch) | |
tree | 71e14597ed8d8c043dcdf349723d3a52b076aeb2 | |
parent | Fix a bug in background updates wherein background updates are never run usin... (diff) | |
download | synapse-github/erikj/event_rs.tar.xz |
event_rs stuff github/erikj/event_rs erikj/event_rs
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | event_rs/Cargo.toml | 18 | ||||
-rw-r--r-- | event_rs/src/lib.rs | 187 | ||||
-rw-r--r-- | parse_events.py | 50 | ||||
-rw-r--r-- | synapse/events/__init__.py | 4 |
5 files changed, 261 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore index 3bd6b1a08c..75c8f77b79 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,6 @@ book/ # complement /complement-* /master.tar.gz + +# rust +/event_rs/target* diff --git a/event_rs/Cargo.toml b/event_rs/Cargo.toml new file mode 100644 index 0000000000..58616a64ba --- /dev/null +++ b/event_rs/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "synapse_events" +version = "0.1.0" +edition = "2021" +authors = ["Erik"] + +[lib] +crate-type = ["cdylib"] + +[dependencies] +anyhow = "1.0.56" +base64 = "0.13.0" +pyo3 = { version = "0.16.1", features = ["extension-module", "anyhow"] } +pythonize = "0.16.0" +serde = { version = "1.0.136", features = ["derive"] } +serde_json = "1.0.79" +sha2 = "0.10.2" +signed-json = { git = "https://github.com/erikjohnston/rust-signed-json.git" } diff --git a/event_rs/src/lib.rs b/event_rs/src/lib.rs new file mode 100644 index 0000000000..c281d9b1d9 --- /dev/null +++ b/event_rs/src/lib.rs @@ -0,0 +1,187 @@ +use std::collections::BTreeMap; + +use anyhow::Context; +use base64::URL_SAFE_NO_PAD; +use pyo3::exceptions::PyAttributeError; +use pyo3::prelude::*; +use pyo3::types::PyBytes; +use pythonize::pythonize; +use serde::Deserialize; +use serde_json::Value; +use sha2::{Digest, Sha256}; +use signed_json::Signed; + +/* + +depth: DictProperty[int] = DictProperty("depth") + content: DictProperty[JsonDict] = DictProperty("content") + hashes: DictProperty[Dict[str, str]] = DictProperty("hashes") + origin: DictProperty[str] = DictProperty("origin") + origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts") + redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None) + room_id: DictProperty[str] = DictProperty("room_id") + sender: DictProperty[str] = DictProperty("sender") + # TODO state_key should be Optional[str]. This is generally asserted in Synapse + # by calling is_state() first (which ensures it is not None), but it is hard (not possible?) + # to properly annotate that calling is_state() asserts that state_key exists + # and is non-None. It would be better to replace such direct references with + # get_state_key() (and a check for None). + state_key: DictProperty[str] = DictProperty("state_key") + type: DictProperty[str] = DictProperty("type") + user_id: DictProperty[str] = DictProperty("sender") + +*/ + +// FYI origin is not included here + +#[derive(Debug, Clone, Deserialize)] + +struct EventInner { + room_id: String, + depth: u64, + hashes: BTreeMap<String, String>, + origin_server_ts: u64, + redacts: Option<String>, + sender: String, + #[serde(rename = "type")] + event_type: String, + #[serde(default)] + state_key: Option<String>, + + content: BTreeMap<String, Value>, +} + +#[pyclass] +#[derive(Debug, Clone, Deserialize)] +struct Event { + #[pyo3(get)] + event_id: String, + #[serde(flatten)] + inner: Signed<EventInner>, +} + +#[pymethods] +impl Event { + #[getter] + fn room_id(&self) -> &str { + &self.inner.room_id + } + + fn get_pdu_json(&self) -> PyResult<String> { + // TODO: Do all the other things `get_pdu_json` does. + Ok(serde_json::to_string(&self.inner).context("bah")?) + } + + #[getter] + fn content(&self, py: Python) -> PyResult<PyObject> { + Ok(pythonize(py, &self.inner.content)?) + } + + #[getter] + fn state_key(&self) -> PyResult<&str> { + if let Some(state_key) = &self.inner.state_key { + Ok(state_key) + } else { + Err(PyAttributeError::new_err("state_key")) + } + } +} + +#[pyfunction] +fn from_bytes(bytes: &PyBytes) -> PyResult<Event> { + let b = bytes.as_bytes(); + + let inner: Signed<EventInner> = serde_json::from_slice(b).context("parsing event")?; + + let mut redacted: BTreeMap<String, Value> = redact(&inner).context("redacting")?; + redacted.remove("signatures"); + redacted.remove("unsigned"); + let redacted_json = serde_json::to_vec(&redacted).context("BAH")?; + + let event_id = base64::encode_config(Sha256::digest(&redacted_json), URL_SAFE_NO_PAD); + + let event = Event { event_id, inner }; + + Ok(event) +} + +#[pymodule] +fn synapse_events(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(from_bytes, m)?)?; + Ok(()) +} + +fn redact<E: serde::de::DeserializeOwned>( + event: &Signed<EventInner>, +) -> Result<E, serde_json::Error> { + let etype = event.event_type.to_string(); + let mut content = event.as_ref().content.clone(); + + let val = serde_json::to_value(event)?; + + let allowed_keys = [ + "event_id", + "sender", + "room_id", + "hashes", + "signatures", + "content", + "type", + "state_key", + "depth", + "prev_events", + "prev_state", + "auth_events", + "origin", + "origin_server_ts", + "membership", + ]; + + let val = match val { + serde_json::Value::Object(obj) => obj, + _ => unreachable!(), // Events always serialize to an object + }; + + let mut val: serde_json::Map<_, _> = val + .into_iter() + .filter(|(k, _)| allowed_keys.contains(&(k as &str))) + .collect(); + + let mut new_content = serde_json::Map::new(); + + let mut copy_content = |key: &str| { + if let Some(v) = content.remove(key) { + new_content.insert(key.to_string(), v); + } + }; + + match &etype[..] { + "m.room.member" => copy_content("membership"), + "m.room.create" => copy_content("creator"), + "m.room.join_rules" => copy_content("join_rule"), + "m.room.aliases" => copy_content("aliases"), + "m.room.history_visibility" => copy_content("history_visibility"), + "m.room.power_levels" => { + for key in &[ + "ban", + "events", + "events_default", + "kick", + "redact", + "state_default", + "users", + "users_default", + ] { + copy_content(key); + } + } + _ => {} + } + + val.insert( + "content".to_string(), + serde_json::Value::Object(new_content), + ); + + serde_json::from_value(serde_json::Value::Object(val)) +} diff --git a/parse_events.py b/parse_events.py new file mode 100644 index 0000000000..e7ba2f7c9e --- /dev/null +++ b/parse_events.py @@ -0,0 +1,50 @@ +import json +import time +from synapse.api.room_versions import RoomVersion, RoomVersions + +from synapse.events import make_event_from_dict + +import synapse_events + +with open("/home/erikj/git/synapse/hq_events", "rb") as f: + event_json = f.readlines() + +start = time.time() + +rust_events = [] + +for e in event_json: + e = e.strip() + e = e.replace(b"\\\\", b"\\") + event = synapse_events.from_bytes(e) + rust_events.append(event) + +now = time.time() + +print(f"Parsed rust event in {now - start:.2f} seconds") + +event_dicts = [] + +start = time.time() + +event_dicts = [] +for e in event_json: + e = e.strip() + e = e.replace(b"\\\\", b"\\") + event_dicts.append(json.loads(e.strip())) + +now = time.time() + +print(f"Parsed JSON in {now - start:.2f} seconds") + +events = [] + +start = time.time() + +for e in event_dicts: + event = make_event_from_dict(e, RoomVersions.V5) + events.append(event) + +now = time.time() + +print(f"Parsed event in {now - start:.2f} seconds") diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 9acb3c0cc4..204e24de17 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -310,7 +310,9 @@ class EventBase(metaclass=abc.ABCMeta): depth: DictProperty[int] = DictProperty("depth") content: DictProperty[JsonDict] = DictProperty("content") hashes: DictProperty[Dict[str, str]] = DictProperty("hashes") - origin: DictProperty[str] = DictProperty("origin") + origin: DictProperty[str] = DictProperty( + "origin" + ) # CAN WE GET RID OF THIS??!!!??!?! origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts") redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None) room_id: DictProperty[str] = DictProperty("room_id") |