diff --git a/.gitignore b/.gitignore
index 3bd6b1a08c..75c8f77b79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,6 @@ book/
# complement
/complement-*
/master.tar.gz
+
+# rust
+/event_rs/target*
diff --git a/event_rs/Cargo.toml b/event_rs/Cargo.toml
new file mode 100644
index 0000000000..58616a64ba
--- /dev/null
+++ b/event_rs/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "synapse_events"
+version = "0.1.0"
+edition = "2021"
+authors = ["Erik"]
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+anyhow = "1.0.56"
+base64 = "0.13.0"
+pyo3 = { version = "0.16.1", features = ["extension-module", "anyhow"] }
+pythonize = "0.16.0"
+serde = { version = "1.0.136", features = ["derive"] }
+serde_json = "1.0.79"
+sha2 = "0.10.2"
+signed-json = { git = "https://github.com/erikjohnston/rust-signed-json.git" }
diff --git a/event_rs/src/lib.rs b/event_rs/src/lib.rs
new file mode 100644
index 0000000000..c281d9b1d9
--- /dev/null
+++ b/event_rs/src/lib.rs
@@ -0,0 +1,187 @@
+use std::collections::BTreeMap;
+
+use anyhow::Context;
+use base64::URL_SAFE_NO_PAD;
+use pyo3::exceptions::PyAttributeError;
+use pyo3::prelude::*;
+use pyo3::types::PyBytes;
+use pythonize::pythonize;
+use serde::Deserialize;
+use serde_json::Value;
+use sha2::{Digest, Sha256};
+use signed_json::Signed;
+
+/*
+
+depth: DictProperty[int] = DictProperty("depth")
+ content: DictProperty[JsonDict] = DictProperty("content")
+ hashes: DictProperty[Dict[str, str]] = DictProperty("hashes")
+ origin: DictProperty[str] = DictProperty("origin")
+ origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts")
+ redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None)
+ room_id: DictProperty[str] = DictProperty("room_id")
+ sender: DictProperty[str] = DictProperty("sender")
+ # TODO state_key should be Optional[str]. This is generally asserted in Synapse
+ # by calling is_state() first (which ensures it is not None), but it is hard (not possible?)
+ # to properly annotate that calling is_state() asserts that state_key exists
+ # and is non-None. It would be better to replace such direct references with
+ # get_state_key() (and a check for None).
+ state_key: DictProperty[str] = DictProperty("state_key")
+ type: DictProperty[str] = DictProperty("type")
+ user_id: DictProperty[str] = DictProperty("sender")
+
+*/
+
+// FYI origin is not included here
+
+#[derive(Debug, Clone, Deserialize)]
+
+struct EventInner {
+ room_id: String,
+ depth: u64,
+ hashes: BTreeMap<String, String>,
+ origin_server_ts: u64,
+ redacts: Option<String>,
+ sender: String,
+ #[serde(rename = "type")]
+ event_type: String,
+ #[serde(default)]
+ state_key: Option<String>,
+
+ content: BTreeMap<String, Value>,
+}
+
+#[pyclass]
+#[derive(Debug, Clone, Deserialize)]
+struct Event {
+ #[pyo3(get)]
+ event_id: String,
+ #[serde(flatten)]
+ inner: Signed<EventInner>,
+}
+
+#[pymethods]
+impl Event {
+ #[getter]
+ fn room_id(&self) -> &str {
+ &self.inner.room_id
+ }
+
+ fn get_pdu_json(&self) -> PyResult<String> {
+ // TODO: Do all the other things `get_pdu_json` does.
+ Ok(serde_json::to_string(&self.inner).context("bah")?)
+ }
+
+ #[getter]
+ fn content(&self, py: Python) -> PyResult<PyObject> {
+ Ok(pythonize(py, &self.inner.content)?)
+ }
+
+ #[getter]
+ fn state_key(&self) -> PyResult<&str> {
+ if let Some(state_key) = &self.inner.state_key {
+ Ok(state_key)
+ } else {
+ Err(PyAttributeError::new_err("state_key"))
+ }
+ }
+}
+
+#[pyfunction]
+fn from_bytes(bytes: &PyBytes) -> PyResult<Event> {
+ let b = bytes.as_bytes();
+
+ let inner: Signed<EventInner> = serde_json::from_slice(b).context("parsing event")?;
+
+ let mut redacted: BTreeMap<String, Value> = redact(&inner).context("redacting")?;
+ redacted.remove("signatures");
+ redacted.remove("unsigned");
+ let redacted_json = serde_json::to_vec(&redacted).context("BAH")?;
+
+ let event_id = base64::encode_config(Sha256::digest(&redacted_json), URL_SAFE_NO_PAD);
+
+ let event = Event { event_id, inner };
+
+ Ok(event)
+}
+
+#[pymodule]
+fn synapse_events(_py: Python, m: &PyModule) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(from_bytes, m)?)?;
+ Ok(())
+}
+
+fn redact<E: serde::de::DeserializeOwned>(
+ event: &Signed<EventInner>,
+) -> Result<E, serde_json::Error> {
+ let etype = event.event_type.to_string();
+ let mut content = event.as_ref().content.clone();
+
+ let val = serde_json::to_value(event)?;
+
+ let allowed_keys = [
+ "event_id",
+ "sender",
+ "room_id",
+ "hashes",
+ "signatures",
+ "content",
+ "type",
+ "state_key",
+ "depth",
+ "prev_events",
+ "prev_state",
+ "auth_events",
+ "origin",
+ "origin_server_ts",
+ "membership",
+ ];
+
+ let val = match val {
+ serde_json::Value::Object(obj) => obj,
+ _ => unreachable!(), // Events always serialize to an object
+ };
+
+ let mut val: serde_json::Map<_, _> = val
+ .into_iter()
+ .filter(|(k, _)| allowed_keys.contains(&(k as &str)))
+ .collect();
+
+ let mut new_content = serde_json::Map::new();
+
+ let mut copy_content = |key: &str| {
+ if let Some(v) = content.remove(key) {
+ new_content.insert(key.to_string(), v);
+ }
+ };
+
+ match &etype[..] {
+ "m.room.member" => copy_content("membership"),
+ "m.room.create" => copy_content("creator"),
+ "m.room.join_rules" => copy_content("join_rule"),
+ "m.room.aliases" => copy_content("aliases"),
+ "m.room.history_visibility" => copy_content("history_visibility"),
+ "m.room.power_levels" => {
+ for key in &[
+ "ban",
+ "events",
+ "events_default",
+ "kick",
+ "redact",
+ "state_default",
+ "users",
+ "users_default",
+ ] {
+ copy_content(key);
+ }
+ }
+ _ => {}
+ }
+
+ val.insert(
+ "content".to_string(),
+ serde_json::Value::Object(new_content),
+ );
+
+ serde_json::from_value(serde_json::Value::Object(val))
+}
diff --git a/parse_events.py b/parse_events.py
new file mode 100644
index 0000000000..e7ba2f7c9e
--- /dev/null
+++ b/parse_events.py
@@ -0,0 +1,50 @@
+import json
+import time
+from synapse.api.room_versions import RoomVersion, RoomVersions
+
+from synapse.events import make_event_from_dict
+
+import synapse_events
+
+with open("/home/erikj/git/synapse/hq_events", "rb") as f:
+ event_json = f.readlines()
+
+start = time.time()
+
+rust_events = []
+
+for e in event_json:
+ e = e.strip()
+ e = e.replace(b"\\\\", b"\\")
+ event = synapse_events.from_bytes(e)
+ rust_events.append(event)
+
+now = time.time()
+
+print(f"Parsed rust event in {now - start:.2f} seconds")
+
+event_dicts = []
+
+start = time.time()
+
+event_dicts = []
+for e in event_json:
+ e = e.strip()
+ e = e.replace(b"\\\\", b"\\")
+ event_dicts.append(json.loads(e.strip()))
+
+now = time.time()
+
+print(f"Parsed JSON in {now - start:.2f} seconds")
+
+events = []
+
+start = time.time()
+
+for e in event_dicts:
+ event = make_event_from_dict(e, RoomVersions.V5)
+ events.append(event)
+
+now = time.time()
+
+print(f"Parsed event in {now - start:.2f} seconds")
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 9acb3c0cc4..204e24de17 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -310,7 +310,9 @@ class EventBase(metaclass=abc.ABCMeta):
depth: DictProperty[int] = DictProperty("depth")
content: DictProperty[JsonDict] = DictProperty("content")
hashes: DictProperty[Dict[str, str]] = DictProperty("hashes")
- origin: DictProperty[str] = DictProperty("origin")
+ origin: DictProperty[str] = DictProperty(
+ "origin"
+ ) # CAN WE GET RID OF THIS??!!!??!?!
origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts")
redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None)
room_id: DictProperty[str] = DictProperty("room_id")
|