From 86780a8bc3eac566d2c03a601f84b5ccf5737ceb Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 28 Mar 2017 10:41:08 +0100
Subject: Don't convert to deferreds when not necessary

---
 synapse/util/async.py              | 2 +-
 synapse/util/caches/descriptors.py | 5 ++++-
 synapse/util/logcontext.py         | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/async.py b/synapse/util/async.py
index 35380bf8ed..8495de496a 100644
--- a/synapse/util/async.py
+++ b/synapse/util/async.py
@@ -101,7 +101,7 @@ class ObservableDeferred(object):
             return d
         else:
             success, res = self._result
-            return defer.succeed(res) if success else defer.fail(res)
+            return res if success else defer.fail(res)
 
     def observers(self):
         return self._observers
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 5c30ed235d..1607978e29 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -341,7 +341,10 @@ class CacheDescriptor(_CacheDescriptorBase):
                 cache.set(cache_key, result_d, callback=invalidate_callback)
                 observer = result_d.observe()
 
-            return logcontext.make_deferred_yieldable(observer)
+            if isinstance(observer, defer.Deferred):
+                return logcontext.make_deferred_yieldable(observer)
+            else:
+                return observer
 
         wrapped.invalidate = cache.invalidate
         wrapped.invalidate_all = cache.invalidate_all
diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py
index 857afee7cb..183d9cf62f 100644
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@@ -315,6 +315,9 @@ def preserve_context_over_deferred(deferred, context=None):
     the deferred follow the synapse logcontext rules: try
     ``make_deferred_yieldable`` instead.
     """
+    if not isinstance(deferred, defer.Deferred):
+        return deferred
+
     if context is None:
         context = LoggingContext.current_context()
     d = _PreservingContextDeferred(context)
-- 
cgit 1.5.1


From 014fee93b38ea93ee7dd9f9f9211895272e50834 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 28 Mar 2017 11:14:15 +0100
Subject: Manually calculate cache key as getcallargs is expensive

This is because getcallargs recomputes the getargspec, amongst other
things, which we don't need to do as its already been done
---
 synapse/util/caches/descriptors.py | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 1607978e29..eed60d567e 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -197,6 +197,7 @@ class _CacheDescriptorBase(object):
 
         arg_spec = inspect.getargspec(orig)
         all_args = arg_spec.args
+        self.arg_spec = arg_spec
 
         if "cache_context" in all_args:
             if not cache_context:
@@ -226,6 +227,14 @@ class _CacheDescriptorBase(object):
         self.num_args = num_args
         self.arg_names = all_args[1:num_args + 1]
 
+        if arg_spec.defaults:
+            self.arg_defaults = dict(zip(
+                all_args[-len(arg_spec.defaults):],
+                arg_spec.defaults
+            ))
+        else:
+            self.arg_defaults = {}
+
         if "cache_context" in self.arg_names:
             raise Exception(
                 "cache_context arg cannot be included among the cache keys"
@@ -289,18 +298,31 @@ class CacheDescriptor(_CacheDescriptorBase):
             iterable=self.iterable,
         )
 
+        def get_cache_key(args, kwargs):
+            """Given some args/kwargs return a generator that resolves into
+            the cache_key.
+
+            We loop through each arg name, looking up if its in the `kwargs`,
+            otherwise using the next argument in `args`. If there are no more
+            args then we try looking the arg name up in the defaults
+            """
+            pos = 0
+            for nm in self.arg_names:
+                if nm in kwargs:
+                    yield kwargs[nm]
+                elif pos < len(args):
+                    yield args[pos]
+                    pos += 1
+                else:
+                    yield self.arg_defaults[nm]
+
         @functools.wraps(self.orig)
         def wrapped(*args, **kwargs):
             # If we're passed a cache_context then we'll want to call its invalidate()
             # whenever we are invalidated
             invalidate_callback = kwargs.pop("on_invalidate", None)
 
-            # Add temp cache_context so inspect.getcallargs doesn't explode
-            if self.add_cache_context:
-                kwargs["cache_context"] = None
-
-            arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs)
-            cache_key = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names)
+            cache_key = tuple(get_cache_key(args, kwargs))
 
             # Add our own `cache_context` to argument list if the wrapped function
             # has asked for one
-- 
cgit 1.5.1


From 6194a64ae913aa400e19c3a9bd9348ce2bc11305 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 30 Mar 2017 14:19:10 +0100
Subject: Doc new instance variables

---
 synapse/util/caches/descriptors.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'synapse/util')

diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index eed60d567e..1f02cca8a5 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -197,7 +197,6 @@ class _CacheDescriptorBase(object):
 
         arg_spec = inspect.getargspec(orig)
         all_args = arg_spec.args
-        self.arg_spec = arg_spec
 
         if "cache_context" in all_args:
             if not cache_context:
@@ -225,8 +224,16 @@ class _CacheDescriptorBase(object):
             )
 
         self.num_args = num_args
+
+        # list of the names of the args used as the cache key
         self.arg_names = all_args[1:num_args + 1]
 
+        # The arg spec of the wrapped function, see `inspect.getargspec` for
+        # the type.
+        self.arg_spec = arg_spec
+
+        # self.arg_defaults is a map of arg name to its default value for each
+        # argument that has a default value
         if arg_spec.defaults:
             self.arg_defaults = dict(zip(
                 all_args[-len(arg_spec.defaults):],
-- 
cgit 1.5.1


From b282fe7170142191c8ce795270422754ab4bc58e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 30 Mar 2017 17:03:59 +0100
Subject: Revert log context change

---
 synapse/util/logcontext.py | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py
index 183d9cf62f..857afee7cb 100644
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@@ -315,9 +315,6 @@ def preserve_context_over_deferred(deferred, context=None):
     the deferred follow the synapse logcontext rules: try
     ``make_deferred_yieldable`` instead.
     """
-    if not isinstance(deferred, defer.Deferred):
-        return deferred
-
     if context is None:
         context = LoggingContext.current_context()
     d = _PreservingContextDeferred(context)
-- 
cgit 1.5.1


From 5b5b171f3e9223351f72150ea73e1c9797144eae Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 30 Mar 2017 17:05:53 +0100
Subject: Docs

---
 synapse/util/async.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'synapse/util')

diff --git a/synapse/util/async.py b/synapse/util/async.py
index 8495de496a..1453faf0ef 100644
--- a/synapse/util/async.py
+++ b/synapse/util/async.py
@@ -89,6 +89,11 @@ class ObservableDeferred(object):
         deferred.addCallbacks(callback, errback)
 
     def observe(self):
+        """Observe the underlying deferred.
+
+        Can return either a deferred if the underlying deferred is still pending
+        (or has failed), or the actual value. Callers may need to use maybeDeferred.
+        """
         if not self._result:
             d = defer.Deferred()
 
-- 
cgit 1.5.1


From 4d17add8de6d1c3bcd073246519f3cdaa5063bed Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 31 Mar 2017 09:38:27 +0100
Subject: Remove unused instance variable

---
 synapse/util/caches/descriptors.py | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 1f02cca8a5..9d0d0be1f9 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -228,10 +228,6 @@ class _CacheDescriptorBase(object):
         # list of the names of the args used as the cache key
         self.arg_names = all_args[1:num_args + 1]
 
-        # The arg spec of the wrapped function, see `inspect.getargspec` for
-        # the type.
-        self.arg_spec = arg_spec
-
         # self.arg_defaults is a map of arg name to its default value for each
         # argument that has a default value
         if arg_spec.defaults:
-- 
cgit 1.5.1


From e2eebf16963d9580a581a15308d2771dce875a83 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 3 Apr 2017 15:38:02 +0100
Subject: Fix fixme in preserve_fn

`preserve_fn` is no longer used as a decorator anywhere, so we can safely fix a
fixme therein.
---
 synapse/util/logcontext.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py
index 857afee7cb..990216145e 100644
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@@ -334,12 +334,8 @@ def preserve_fn(f):
         LoggingContext.set_current_context(LoggingContext.sentinel)
         return result
 
-    # XXX: why is this here rather than inside g? surely we want to preserve
-    # the context from the time the function was called, not when it was
-    # wrapped?
-    current = LoggingContext.current_context()
-
     def g(*args, **kwargs):
+        current = LoggingContext.current_context()
         res = f(*args, **kwargs)
         if isinstance(res, defer.Deferred) and not res.called:
             # The function will have reset the context before returning, so
-- 
cgit 1.5.1


From d134d0935e20add0dca69a3e0787e9629763ac78 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 24 Apr 2017 14:07:27 +0100
Subject: Only intern ascii strings

---
 synapse/util/caches/__init__.py | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 8a7774a88e..9fd35a8134 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import synapse.metrics
-from lrucache import LruCache
 import os
 
 CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1))
@@ -40,10 +39,6 @@ def register_cache(name, cache):
     )
 
 
-_string_cache = LruCache(int(100000 * CACHE_SIZE_FACTOR))
-_stirng_cache_metrics = register_cache("string_cache", _string_cache)
-
-
 KNOWN_KEYS = {
     key: key for key in
     (
@@ -67,14 +62,16 @@ KNOWN_KEYS = {
 
 
 def intern_string(string):
-    """Takes a (potentially) unicode string and interns using custom cache
+    """Takes a (potentially) unicode string and interns it if it's ascii
     """
-    new_str = _string_cache.setdefault(string, string)
-    if new_str is string:
-        _stirng_cache_metrics.inc_hits()
-    else:
-        _stirng_cache_metrics.inc_misses()
-    return new_str
+    if string is None:
+        return None
+
+    try:
+        string = string.encode("ascii")
+        return intern(string)
+    except UnicodeEncodeError:
+        return string
 
 
 def intern_dict(dictionary):
@@ -87,13 +84,9 @@ def intern_dict(dictionary):
 
 
 def _intern_known_values(key, value):
-    intern_str_keys = ("event_id", "room_id")
-    intern_unicode_keys = ("sender", "user_id", "type", "state_key")
-
-    if key in intern_str_keys:
-        return intern(value.encode('ascii'))
+    intern_keys = ("event_id", "room_id", "sender", "user_id", "type", "state_key",)
 
-    if key in intern_unicode_keys:
+    if key in intern_keys:
         return intern_string(value)
 
     return value
-- 
cgit 1.5.1


From 119cb9bbcf429321d27cbc1422049974e2ad8982 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 25 Apr 2017 10:23:11 +0100
Subject: Reduce cache size by not storing deferreds

Currently the cache descriptors store deferreds rather than raw values,
this is a simple way of triggering only one database hit and sharing the
result if two callers attempt to get the same value.

However, there are a few caches that simply store a mapping from string
to string (or int). These caches can have a large number of entries,
under the assumption that each entry is small. However, the size of a
deferred (specifically the size of ObservableDeferred) is signigicantly
larger than that of the raw value, 2kb vs 32b.

This PR therefore changes the cache descriptors to store the raw values
rather than the deferreds.

As a side effect cached storage function now either return a deferred or
the actual value, as the cached list decriptor already does. This is
fine as we always end up just yield'ing on the returned value
eventually, which handles that case correctly.
---
 synapse/storage/receipts.py        | 11 +++++++----
 synapse/util/caches/descriptors.py | 39 ++++++++++++++++++++------------------
 2 files changed, 28 insertions(+), 22 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py
index 6b0f8c2787..efb90c3c91 100644
--- a/synapse/storage/receipts.py
+++ b/synapse/storage/receipts.py
@@ -47,10 +47,13 @@ class ReceiptsStore(SQLBaseStore):
         # Returns an ObservableDeferred
         res = self.get_users_with_read_receipts_in_room.cache.get((room_id,), None)
 
-        if res and res.called and user_id in res.result:
-            # We'd only be adding to the set, so no point invalidating if the
-            # user is already there
-            return
+        if res:
+            if isinstance(res, defer.Deferred) and res.called:
+                res = res.result
+            if user_id in res:
+                # We'd only be adding to the set, so no point invalidating if the
+                # user is already there
+                return
 
         self.get_users_with_read_receipts_in_room.invalidate((room_id,))
 
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 9d0d0be1f9..807e147657 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -19,7 +19,7 @@ from synapse.util import unwrapFirstError, logcontext
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
 
-from . import DEBUG_CACHES, register_cache
+from . import register_cache
 
 from twisted.internet import defer
 from collections import namedtuple
@@ -76,7 +76,7 @@ class Cache(object):
 
         self.cache = LruCache(
             max_size=max_entries, keylen=keylen, cache_type=cache_type,
-            size_callback=(lambda d: len(d.result)) if iterable else None,
+            size_callback=(lambda d: len(d)) if iterable else None,
         )
 
         self.name = name
@@ -96,6 +96,17 @@ class Cache(object):
                 )
 
     def get(self, key, default=_CacheSentinel, callback=None):
+        """Looks the key up in the caches.
+
+        Args:
+            key(tuple)
+            default: What is returned if key is not in the caches. If not
+                specified then function throws KeyError instead
+            callback(fn): Gets called when the entry in the cache is invalidated
+
+        Returns:
+            Either a Deferred or the raw result
+        """
         callbacks = [callback] if callback else []
         val = self._pending_deferred_cache.get(key, _CacheSentinel)
         if val is not _CacheSentinel:
@@ -137,7 +148,7 @@ class Cache(object):
             if self.sequence == entry.sequence:
                 existing_entry = self._pending_deferred_cache.pop(key, None)
                 if existing_entry is entry:
-                    self.cache.set(key, entry.deferred, entry.callbacks)
+                    self.cache.set(key, result, entry.callbacks)
                 else:
                     entry.invalidate()
             else:
@@ -335,20 +346,10 @@ class CacheDescriptor(_CacheDescriptorBase):
             try:
                 cached_result_d = cache.get(cache_key, callback=invalidate_callback)
 
-                observer = cached_result_d.observe()
-                if DEBUG_CACHES:
-                    @defer.inlineCallbacks
-                    def check_result(cached_result):
-                        actual_result = yield self.function_to_call(obj, *args, **kwargs)
-                        if actual_result != cached_result:
-                            logger.error(
-                                "Stale cache entry %s%r: cached: %r, actual %r",
-                                self.orig.__name__, cache_key,
-                                cached_result, actual_result,
-                            )
-                            raise ValueError("Stale cache entry")
-                        defer.returnValue(cached_result)
-                    observer.addCallback(check_result)
+                if isinstance(cached_result_d, ObservableDeferred):
+                    observer = cached_result_d.observe()
+                else:
+                    observer = cached_result_d
 
             except KeyError:
                 ret = defer.maybeDeferred(
@@ -447,7 +448,9 @@ class CacheListDescriptor(_CacheDescriptorBase):
 
                 try:
                     res = cache.get(tuple(key), callback=invalidate_callback)
-                    if not res.has_succeeded():
+                    if not isinstance(res, ObservableDeferred):
+                        results[arg] = res
+                    elif not res.has_succeeded():
                         res = res.observe()
                         res.addCallback(lambda r, arg: (arg, r), arg)
                         cached_defers[arg] = res
-- 
cgit 1.5.1


From efab1dadde8ce6305bf0960bf70aaeef54b42db6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 25 Apr 2017 10:54:09 +0100
Subject: Remove DEBUG_CACHES

---
 synapse/util/caches/__init__.py | 2 --
 1 file changed, 2 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 9fd35a8134..4a83c46d98 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -18,8 +18,6 @@ import os
 
 CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1))
 
-DEBUG_CACHES = False
-
 metrics = synapse.metrics.get_metrics_for("synapse.util.caches")
 
 caches_by_name = {}
-- 
cgit 1.5.1


From d9aa645f86db733bb0419b5f5428ba9a9c799735 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 25 Apr 2017 14:38:51 +0100
Subject: Reduce size of joined_user cache

The _get_joined_users_from_context cache stores a mapping from user_id
to avatar_url and display_name. Instead of storing those in a dict,
store them in a namedtuple as that uses much less memory.

We also try converting the string to ascii to further reduce the size.
---
 synapse/push/bulk_push_rule_evaluator.py |  7 +++++--
 synapse/rest/client/v1/room.py           |  8 +++++++-
 synapse/storage/roommember.py            | 22 ++++++++++++++--------
 synapse/util/stringutils.py              | 14 ++++++++++++++
 4 files changed, 40 insertions(+), 11 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 78b095c903..503fef4261 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -87,8 +87,11 @@ class BulkPushRuleEvaluator:
         condition_cache = {}
 
         for uid, rules in self.rules_by_user.items():
-            display_name = room_members.get(uid, {}).get("display_name", None)
-            if not display_name:
+            display_name = None
+            profile_info = room_members.get(uid, {})
+            if profile_info:
+                display_name = profile_info.display_name
+            else:
                 # Handle the case where we are pushing a membership event to
                 # that user, as they might not be already joined.
                 if event.type == EventTypes.Member and event.state_key == uid:
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 0bdd6b5b36..c376ab8fd7 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -406,7 +406,13 @@ class JoinedRoomMemberListRestServlet(ClientV1RestServlet):
         users_with_profile = yield self.state.get_current_user_in_room(room_id)
 
         defer.returnValue((200, {
-            "joined": users_with_profile
+            "joined": {
+                user_id: {
+                    "avatar_url": profile.avatar_url,
+                    "display_name": profile.display_name,
+                }
+                for user_id, profile in users_with_profile.iteritems()
+            }
         }))
 
 
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 367dbbbcf6..68e724ac7f 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -19,6 +19,7 @@ from collections import namedtuple
 
 from ._base import SQLBaseStore
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+from synapse.util.stringutils import to_ascii
 
 from synapse.api.constants import Membership, EventTypes
 from synapse.types import get_domain_from_id
@@ -35,6 +36,11 @@ RoomsForUser = namedtuple(
 )
 
 
+ProfileInfo = namedtuple(
+    "ProfileInfo", ("avatar_url", "display_name")
+)
+
+
 _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update"
 
 
@@ -422,20 +428,20 @@ class RoomMemberStore(SQLBaseStore):
         )
 
         users_in_room = {
-            row["user_id"]: {
-                "display_name": row["display_name"],
-                "avatar_url": row["avatar_url"],
-            }
+            to_ascii(row["user_id"]): ProfileInfo(
+                avatar_url=to_ascii(row["avatar_url"]),
+                display_name=to_ascii(row["display_name"]),
+            )
             for row in rows
         }
 
         if event is not None and event.type == EventTypes.Member:
             if event.membership == Membership.JOIN:
                 if event.event_id in member_event_ids:
-                    users_in_room[event.state_key] = {
-                        "display_name": event.content.get("displayname", None),
-                        "avatar_url": event.content.get("avatar_url", None),
-                    }
+                    users_in_room[to_ascii(event.state_key)] = ProfileInfo(
+                        display_name=to_ascii(event.content.get("displayname", None)),
+                        avatar_url=to_ascii(event.content.get("avatar_url", None)),
+                    )
 
         defer.returnValue(users_in_room)
 
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index a100f151d4..95a6168e16 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -40,3 +40,17 @@ def is_ascii(s):
         return False
     else:
         return True
+
+
+def to_ascii(s):
+    """Converts a string to ascii if it is ascii, otherwise leave it alone.
+
+    If given None then will return None.
+    """
+    if s is None:
+        return None
+
+    try:
+        return s.encode("ascii")
+    except UnicodeEncodeError:
+        return s
-- 
cgit 1.5.1


From 2e996271fe7c7acc828e19535ba2eec0b503f9e3 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 2 May 2017 13:26:17 +0100
Subject: Instantiate DeferredTimedOutError correctly

Call `super` correctly, so that we correctly initialise the `errcode` field.

Fixes https://github.com/matrix-org/synapse/issues/2179.
---
 synapse/util/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'synapse/util')

diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py
index 98a5a26ac5..2a2360ab5d 100644
--- a/synapse/util/__init__.py
+++ b/synapse/util/__init__.py
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
 
 class DeferredTimedOutError(SynapseError):
     def __init__(self):
-        super(SynapseError, self).__init__(504, "Timed out")
+        super(DeferredTimedOutError, self).__init__(504, "Timed out")
 
 
 def unwrapFirstError(failure):
-- 
cgit 1.5.1


From d2d8ed48848fd61b34b4327117bdf0d0e84ea285 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 4 May 2017 14:18:46 +0100
Subject: Optimise caches with single key

---
 synapse/util/caches/descriptors.py | 42 ++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 9 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 807e147657..aa182eeac7 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -18,6 +18,7 @@ from synapse.util.async import ObservableDeferred
 from synapse.util import unwrapFirstError, logcontext
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
+from synapse.util.stringutils import to_ascii
 
 from . import register_cache
 
@@ -163,10 +164,6 @@ class Cache(object):
 
     def invalidate(self, key):
         self.check_thread()
-        if not isinstance(key, tuple):
-            raise TypeError(
-                "The cache key must be a tuple not %r" % (type(key),)
-            )
 
         # Increment the sequence number so that any SELECT statements that
         # raced with the INSERT don't update the cache (SYN-369)
@@ -312,7 +309,7 @@ class CacheDescriptor(_CacheDescriptorBase):
             iterable=self.iterable,
         )
 
-        def get_cache_key(args, kwargs):
+        def get_cache_key_gen(args, kwargs):
             """Given some args/kwargs return a generator that resolves into
             the cache_key.
 
@@ -330,13 +327,29 @@ class CacheDescriptor(_CacheDescriptorBase):
                 else:
                     yield self.arg_defaults[nm]
 
+        # By default our cache key is a tuple, but if there is only one item
+        # then don't bother wrapping in a tuple.  This is to save memory.
+        if self.num_args == 1:
+            nm = self.arg_names[0]
+
+            def get_cache_key(args, kwargs):
+                if nm in kwargs:
+                    return kwargs[nm]
+                elif len(args):
+                    return args[0]
+                else:
+                    return self.arg_defaults[nm]
+        else:
+            def get_cache_key(args, kwargs):
+                return tuple(get_cache_key_gen(args, kwargs))
+
         @functools.wraps(self.orig)
         def wrapped(*args, **kwargs):
             # If we're passed a cache_context then we'll want to call its invalidate()
             # whenever we are invalidated
             invalidate_callback = kwargs.pop("on_invalidate", None)
 
-            cache_key = tuple(get_cache_key(args, kwargs))
+            cache_key = get_cache_key(args, kwargs)
 
             # Add our own `cache_context` to argument list if the wrapped function
             # has asked for one
@@ -363,6 +376,11 @@ class CacheDescriptor(_CacheDescriptorBase):
 
                 ret.addErrback(onErr)
 
+                # If our cache_key is a string, try to convert to ascii to save
+                # a bit of space in large caches
+                if isinstance(cache_key, basestring):
+                    cache_key = to_ascii(cache_key)
+
                 result_d = ObservableDeferred(ret, consumeErrors=True)
                 cache.set(cache_key, result_d, callback=invalidate_callback)
                 observer = result_d.observe()
@@ -372,10 +390,16 @@ class CacheDescriptor(_CacheDescriptorBase):
             else:
                 return observer
 
-        wrapped.invalidate = cache.invalidate
+        if self.num_args == 1:
+            wrapped.invalidate = lambda key: cache.invalidate(key[0])
+            wrapped.prefill = lambda key, val: cache.prefill(key[0], val)
+        else:
+            wrapped.invalidate = cache.invalidate
+            wrapped.invalidate_all = cache.invalidate_all
+            wrapped.invalidate_many = cache.invalidate_many
+            wrapped.prefill = cache.prefill
+
         wrapped.invalidate_all = cache.invalidate_all
-        wrapped.invalidate_many = cache.invalidate_many
-        wrapped.prefill = cache.prefill
         wrapped.cache = cache
 
         obj.__dict__[self.orig.__name__] = wrapped
-- 
cgit 1.5.1


From ffad4fe35be3baba5b2fffaa4e9b31f3008d09af Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 8 May 2017 16:06:17 +0100
Subject: Don't update event cache hit ratio from get_joined_users

Otherwise the hit ration of plain get_events gets completely skewed by
calls to get_joined_users* functions.
---
 synapse/storage/events.py          | 13 +++++++++++--
 synapse/storage/roommember.py      |  4 ++++
 synapse/util/caches/descriptors.py |  9 ++++++---
 3 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'synapse/util')

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 98707d40ee..d944984d61 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1343,11 +1343,20 @@ class EventsStore(SQLBaseStore):
     def _invalidate_get_event_cache(self, event_id):
             self._get_event_cache.invalidate((event_id,))
 
-    def _get_events_from_cache(self, events, allow_rejected):
+    def _get_events_from_cache(self, events, allow_rejected, update_metrics=True):
+        """
+        Args:
+            events (list(str)): list of event_ids to fetch
+            allow_rejected (bool): Whether to teturn events that were rejected
+            update_metrics (bool): Whether to update the cache hit ratio metrics
+        """
         event_map = {}
 
         for event_id in events:
-            ret = self._get_event_cache.get((event_id,), None)
+            ret = self._get_event_cache.get(
+                (event_id,), None,
+                update_metrics=update_metrics,
+            )
             if not ret:
                 continue
 
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index ad3c9b06d9..2fa20bd87c 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -421,9 +421,13 @@ class RoomMemberStore(SQLBaseStore):
         # We check if we have any of the member event ids in the event cache
         # before we ask the DB
 
+        # We don't update the event cache hit ratio as it completely throws off
+        # the hit ratio counts. After all, we don't populate the cache if we
+        # miss it here
         event_map = self._get_events_from_cache(
             member_event_ids,
             allow_rejected=False,
+            update_metrics=False,
         )
 
         missing_member_event_ids = []
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index aa182eeac7..48dcbafeef 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -96,7 +96,7 @@ class Cache(object):
                     "Cache objects can only be accessed from the main thread"
                 )
 
-    def get(self, key, default=_CacheSentinel, callback=None):
+    def get(self, key, default=_CacheSentinel, callback=None, update_metrics=True):
         """Looks the key up in the caches.
 
         Args:
@@ -104,6 +104,7 @@ class Cache(object):
             default: What is returned if key is not in the caches. If not
                 specified then function throws KeyError instead
             callback(fn): Gets called when the entry in the cache is invalidated
+            update_metrics (bool): whether to update the cache hit rate metrics
 
         Returns:
             Either a Deferred or the raw result
@@ -113,7 +114,8 @@ class Cache(object):
         if val is not _CacheSentinel:
             if val.sequence == self.sequence:
                 val.callbacks.update(callbacks)
-                self.metrics.inc_hits()
+                if update_metrics:
+                    self.metrics.inc_hits()
                 return val.deferred
 
         val = self.cache.get(key, _CacheSentinel, callbacks=callbacks)
@@ -121,7 +123,8 @@ class Cache(object):
             self.metrics.inc_hits()
             return val
 
-        self.metrics.inc_misses()
+        if update_metrics:
+            self.metrics.inc_misses()
 
         if default is _CacheSentinel:
             raise KeyError()
-- 
cgit 1.5.1