summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2022-03-11 18:10:36 +0000
committerErik Johnston <erik@matrix.org>2022-03-11 18:10:36 +0000
commit07fb52b5d514b8ac6076126cf0c19724507aa910 (patch)
tree71e14597ed8d8c043dcdf349723d3a52b076aeb2
parentFix a bug in background updates wherein background updates are never run usin... (diff)
downloadsynapse-github/erikj/event_rs.tar.xz
-rw-r--r--.gitignore3
-rw-r--r--event_rs/Cargo.toml18
-rw-r--r--event_rs/src/lib.rs187
-rw-r--r--parse_events.py50
-rw-r--r--synapse/events/__init__.py4
5 files changed, 261 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index 3bd6b1a08c..75c8f77b79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,6 @@ book/
 # complement
 /complement-*
 /master.tar.gz
+
+# rust
+/event_rs/target*
diff --git a/event_rs/Cargo.toml b/event_rs/Cargo.toml
new file mode 100644
index 0000000000..58616a64ba
--- /dev/null
+++ b/event_rs/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "synapse_events"
+version = "0.1.0"
+edition = "2021"
+authors = ["Erik"]
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+anyhow = "1.0.56"
+base64 = "0.13.0"
+pyo3 = { version = "0.16.1", features = ["extension-module", "anyhow"] }
+pythonize = "0.16.0"
+serde = { version = "1.0.136", features = ["derive"] }
+serde_json = "1.0.79"
+sha2 = "0.10.2"
+signed-json = { git = "https://github.com/erikjohnston/rust-signed-json.git" }
diff --git a/event_rs/src/lib.rs b/event_rs/src/lib.rs
new file mode 100644
index 0000000000..c281d9b1d9
--- /dev/null
+++ b/event_rs/src/lib.rs
@@ -0,0 +1,187 @@
+use std::collections::BTreeMap;
+
+use anyhow::Context;
+use base64::URL_SAFE_NO_PAD;
+use pyo3::exceptions::PyAttributeError;
+use pyo3::prelude::*;
+use pyo3::types::PyBytes;
+use pythonize::pythonize;
+use serde::Deserialize;
+use serde_json::Value;
+use sha2::{Digest, Sha256};
+use signed_json::Signed;
+
+/*
+
+depth: DictProperty[int] = DictProperty("depth")
+    content: DictProperty[JsonDict] = DictProperty("content")
+    hashes: DictProperty[Dict[str, str]] = DictProperty("hashes")
+    origin: DictProperty[str] = DictProperty("origin")
+    origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts")
+    redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None)
+    room_id: DictProperty[str] = DictProperty("room_id")
+    sender: DictProperty[str] = DictProperty("sender")
+    # TODO state_key should be Optional[str]. This is generally asserted in Synapse
+    # by calling is_state() first (which ensures it is not None), but it is hard (not possible?)
+    # to properly annotate that calling is_state() asserts that state_key exists
+    # and is non-None. It would be better to replace such direct references with
+    # get_state_key() (and a check for None).
+    state_key: DictProperty[str] = DictProperty("state_key")
+    type: DictProperty[str] = DictProperty("type")
+    user_id: DictProperty[str] = DictProperty("sender")
+
+*/
+
+// FYI origin is not included here
+
+#[derive(Debug, Clone, Deserialize)]
+
+struct EventInner {
+    room_id: String,
+    depth: u64,
+    hashes: BTreeMap<String, String>,
+    origin_server_ts: u64,
+    redacts: Option<String>,
+    sender: String,
+    #[serde(rename = "type")]
+    event_type: String,
+    #[serde(default)]
+    state_key: Option<String>,
+
+    content: BTreeMap<String, Value>,
+}
+
+#[pyclass]
+#[derive(Debug, Clone, Deserialize)]
+struct Event {
+    #[pyo3(get)]
+    event_id: String,
+    #[serde(flatten)]
+    inner: Signed<EventInner>,
+}
+
+#[pymethods]
+impl Event {
+    #[getter]
+    fn room_id(&self) -> &str {
+        &self.inner.room_id
+    }
+
+    fn get_pdu_json(&self) -> PyResult<String> {
+        // TODO: Do all the other things `get_pdu_json` does.
+        Ok(serde_json::to_string(&self.inner).context("bah")?)
+    }
+
+    #[getter]
+    fn content(&self, py: Python) -> PyResult<PyObject> {
+        Ok(pythonize(py, &self.inner.content)?)
+    }
+
+    #[getter]
+    fn state_key(&self) -> PyResult<&str> {
+        if let Some(state_key) = &self.inner.state_key {
+            Ok(state_key)
+        } else {
+            Err(PyAttributeError::new_err("state_key"))
+        }
+    }
+}
+
+#[pyfunction]
+fn from_bytes(bytes: &PyBytes) -> PyResult<Event> {
+    let b = bytes.as_bytes();
+
+    let inner: Signed<EventInner> = serde_json::from_slice(b).context("parsing event")?;
+
+    let mut redacted: BTreeMap<String, Value> = redact(&inner).context("redacting")?;
+    redacted.remove("signatures");
+    redacted.remove("unsigned");
+    let redacted_json = serde_json::to_vec(&redacted).context("BAH")?;
+
+    let event_id = base64::encode_config(Sha256::digest(&redacted_json), URL_SAFE_NO_PAD);
+
+    let event = Event { event_id, inner };
+
+    Ok(event)
+}
+
+#[pymodule]
+fn synapse_events(_py: Python, m: &PyModule) -> PyResult<()> {
+    m.add_function(wrap_pyfunction!(from_bytes, m)?)?;
+    Ok(())
+}
+
+fn redact<E: serde::de::DeserializeOwned>(
+    event: &Signed<EventInner>,
+) -> Result<E, serde_json::Error> {
+    let etype = event.event_type.to_string();
+    let mut content = event.as_ref().content.clone();
+
+    let val = serde_json::to_value(event)?;
+
+    let allowed_keys = [
+        "event_id",
+        "sender",
+        "room_id",
+        "hashes",
+        "signatures",
+        "content",
+        "type",
+        "state_key",
+        "depth",
+        "prev_events",
+        "prev_state",
+        "auth_events",
+        "origin",
+        "origin_server_ts",
+        "membership",
+    ];
+
+    let val = match val {
+        serde_json::Value::Object(obj) => obj,
+        _ => unreachable!(), // Events always serialize to an object
+    };
+
+    let mut val: serde_json::Map<_, _> = val
+        .into_iter()
+        .filter(|(k, _)| allowed_keys.contains(&(k as &str)))
+        .collect();
+
+    let mut new_content = serde_json::Map::new();
+
+    let mut copy_content = |key: &str| {
+        if let Some(v) = content.remove(key) {
+            new_content.insert(key.to_string(), v);
+        }
+    };
+
+    match &etype[..] {
+        "m.room.member" => copy_content("membership"),
+        "m.room.create" => copy_content("creator"),
+        "m.room.join_rules" => copy_content("join_rule"),
+        "m.room.aliases" => copy_content("aliases"),
+        "m.room.history_visibility" => copy_content("history_visibility"),
+        "m.room.power_levels" => {
+            for key in &[
+                "ban",
+                "events",
+                "events_default",
+                "kick",
+                "redact",
+                "state_default",
+                "users",
+                "users_default",
+            ] {
+                copy_content(key);
+            }
+        }
+        _ => {}
+    }
+
+    val.insert(
+        "content".to_string(),
+        serde_json::Value::Object(new_content),
+    );
+
+    serde_json::from_value(serde_json::Value::Object(val))
+}
diff --git a/parse_events.py b/parse_events.py
new file mode 100644
index 0000000000..e7ba2f7c9e
--- /dev/null
+++ b/parse_events.py
@@ -0,0 +1,50 @@
+import json
+import time
+from synapse.api.room_versions import RoomVersion, RoomVersions
+
+from synapse.events import make_event_from_dict
+
+import synapse_events
+
+with open("/home/erikj/git/synapse/hq_events", "rb") as f:
+    event_json = f.readlines()
+
+start = time.time()
+
+rust_events = []
+
+for e in event_json:
+    e = e.strip()
+    e = e.replace(b"\\\\", b"\\")
+    event = synapse_events.from_bytes(e)
+    rust_events.append(event)
+
+now = time.time()
+
+print(f"Parsed rust event in {now - start:.2f} seconds")
+
+event_dicts = []
+
+start = time.time()
+
+event_dicts = []
+for e in event_json:
+    e = e.strip()
+    e = e.replace(b"\\\\", b"\\")
+    event_dicts.append(json.loads(e.strip()))
+
+now = time.time()
+
+print(f"Parsed JSON in {now - start:.2f} seconds")
+
+events = []
+
+start = time.time()
+
+for e in event_dicts:
+    event = make_event_from_dict(e, RoomVersions.V5)
+    events.append(event)
+
+now = time.time()
+
+print(f"Parsed event in {now - start:.2f} seconds")
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 9acb3c0cc4..204e24de17 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -310,7 +310,9 @@ class EventBase(metaclass=abc.ABCMeta):
     depth: DictProperty[int] = DictProperty("depth")
     content: DictProperty[JsonDict] = DictProperty("content")
     hashes: DictProperty[Dict[str, str]] = DictProperty("hashes")
-    origin: DictProperty[str] = DictProperty("origin")
+    origin: DictProperty[str] = DictProperty(
+        "origin"
+    )  # CAN WE GET RID OF THIS??!!!??!?!
     origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts")
     redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None)
     room_id: DictProperty[str] = DictProperty("room_id")