diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 1a14904194..d53569ca49 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -14,6 +14,10 @@
# limitations under the License.
import synapse.metrics
+from lrucache import LruCache
+import os
+
+CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1))
DEBUG_CACHES = False
@@ -25,3 +29,56 @@ cache_counter = metrics.register_cache(
lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()},
labels=["name"],
)
+
+_string_cache = LruCache(int(5000 * CACHE_SIZE_FACTOR))
+caches_by_name["string_cache"] = _string_cache
+
+
+KNOWN_KEYS = {
+ key: key for key in
+ (
+ "auth_events",
+ "content",
+ "depth",
+ "event_id",
+ "hashes",
+ "origin",
+ "origin_server_ts",
+ "prev_events",
+ "room_id",
+ "sender",
+ "signatures",
+ "state_key",
+ "type",
+ "unsigned",
+ "user_id",
+ )
+}
+
+
+def intern_string(string):
+ """Takes a (potentially) unicode string and interns using custom cache
+ """
+ return _string_cache.setdefault(string, string)
+
+
+def intern_dict(dictionary):
+ """Takes a dictionary and interns well known keys and their values
+ """
+ return {
+ KNOWN_KEYS.get(key, key): _intern_known_values(key, value)
+ for key, value in dictionary.items()
+ }
+
+
+def _intern_known_values(key, value):
+ intern_str_keys = ("event_id", "room_id")
+ intern_unicode_keys = ("sender", "user_id", "type", "state_key")
+
+ if key in intern_str_keys:
+ return intern(value.encode('ascii'))
+
+ if key in intern_unicode_keys:
+ return intern_string(value)
+
+ return value
|