diff options
-rw-r--r-- | README.rst | 92 | ||||
-rw-r--r-- | synapse/handlers/federation.py | 8 | ||||
-rw-r--r-- | synapse/handlers/identity.py | 2 | ||||
-rw-r--r-- | synapse/handlers/message.py | 6 | ||||
-rw-r--r-- | synapse/handlers/register.py | 3 | ||||
-rw-r--r-- | synapse/handlers/sync.py | 6 | ||||
-rw-r--r-- | synapse/rest/client/v2_alpha/register.py | 127 | ||||
-rw-r--r-- | synapse/rest/media/v1/base_resource.py | 77 | ||||
-rw-r--r-- | synapse/state.py | 10 | ||||
-rw-r--r-- | synapse/storage/_base.py | 49 | ||||
-rw-r--r-- | synapse/storage/roommember.py | 4 | ||||
-rw-r--r-- | synapse/storage/state.py | 267 | ||||
-rw-r--r-- | synapse/storage/stream.py | 3 | ||||
-rw-r--r-- | synapse/util/dictionary_cache.py | 106 | ||||
-rw-r--r-- | tests/rest/client/v2_alpha/test_register.py | 134 | ||||
-rw-r--r-- | tests/test_state.py | 2 | ||||
-rw-r--r-- | tests/util/test_dict_cache.py | 101 |
17 files changed, 758 insertions, 239 deletions
diff --git a/README.rst b/README.rst index 21e3479687..5ff53f2df7 100644 --- a/README.rst +++ b/README.rst @@ -101,25 +101,26 @@ header files for python C extensions. Installing prerequisites on Ubuntu or Debian:: - $ sudo apt-get install build-essential python2.7-dev libffi-dev \ - python-pip python-setuptools sqlite3 \ - libssl-dev python-virtualenv libjpeg-dev + sudo apt-get install build-essential python2.7-dev libffi-dev \ + python-pip python-setuptools sqlite3 \ + libssl-dev python-virtualenv libjpeg-dev Installing prerequisites on ArchLinux:: - $ sudo pacman -S base-devel python2 python-pip \ - python-setuptools python-virtualenv sqlite3 + sudo pacman -S base-devel python2 python-pip \ + python-setuptools python-virtualenv sqlite3 Installing prerequisites on Mac OS X:: - $ xcode-select --install - $ sudo pip install virtualenv + xcode-select --install + sudo easy_install pip + sudo pip install virtualenv To install the synapse homeserver run:: - $ virtualenv -p python2.7 ~/.synapse - $ source ~/.synapse/bin/activate - $ pip install --process-dependency-links https://github.com/matrix-org/synapse/tarball/master + virtualenv -p python2.7 ~/.synapse + source ~/.synapse/bin/activate + pip install --process-dependency-links https://github.com/matrix-org/synapse/tarball/master This installs synapse, along with the libraries it uses, into a virtual environment under ``~/.synapse``. Feel free to pick a different directory @@ -132,8 +133,8 @@ above in Docker at https://registry.hub.docker.com/u/silviof/docker-matrix/. To set up your homeserver, run (in your virtualenv, as before):: - $ cd ~/.synapse - $ python -m synapse.app.homeserver \ + cd ~/.synapse + python -m synapse.app.homeserver \ --server-name machine.my.domain.name \ --config-path homeserver.yaml \ --generate-config @@ -192,9 +193,9 @@ Running Synapse To actually run your new homeserver, pick a working directory for Synapse to run (e.g. ``~/.synapse``), and:: - $ cd ~/.synapse - $ source ./bin/activate - $ synctl start + cd ~/.synapse + source ./bin/activate + synctl start Platform Specific Instructions ============================== @@ -212,12 +213,12 @@ defaults to python 3, but synapse currently assumes python 2.7 by default: pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ):: - $ sudo pip2.7 install --upgrade pip + sudo pip2.7 install --upgrade pip You also may need to explicitly specify python 2.7 again during the install request:: - $ pip2.7 install --process-dependency-links \ + pip2.7 install --process-dependency-links \ https://github.com/matrix-org/synapse/tarball/master If you encounter an error with lib bcrypt causing an Wrong ELF Class: @@ -225,13 +226,13 @@ ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly compile it under the right architecture. (This should not be needed if installing under virtualenv):: - $ sudo pip2.7 uninstall py-bcrypt - $ sudo pip2.7 install py-bcrypt + sudo pip2.7 uninstall py-bcrypt + sudo pip2.7 install py-bcrypt During setup of Synapse you need to call python2.7 directly again:: - $ cd ~/.synapse - $ python2.7 -m synapse.app.homeserver \ + cd ~/.synapse + python2.7 -m synapse.app.homeserver \ --server-name machine.my.domain.name \ --config-path homeserver.yaml \ --generate-config @@ -279,22 +280,22 @@ Synapse requires pip 1.7 or later, so if your OS provides too old a version and you get errors about ``error: no such option: --process-dependency-links`` you may need to manually upgrade it:: - $ sudo pip install --upgrade pip + sudo pip install --upgrade pip If pip crashes mid-installation for reason (e.g. lost terminal), pip may refuse to run until you remove the temporary installation directory it created. To reset the installation:: - $ rm -rf /tmp/pip_install_matrix + rm -rf /tmp/pip_install_matrix pip seems to leak *lots* of memory during installation. For instance, a Linux host with 512MB of RAM may run out of memory whilst installing Twisted. If this happens, you will have to individually install the dependencies which are failing, e.g.:: - $ pip install twisted + pip install twisted -On OSX, if you encounter clang: error: unknown argument: '-mno-fused-madd' you +On OS X, if you encounter clang: error: unknown argument: '-mno-fused-madd' you will need to export CFLAGS=-Qunused-arguments. Troubleshooting Running @@ -310,10 +311,11 @@ correctly, causing all tests to fail with errors about missing "sodium.h". To fix try re-installing from PyPI or directly from (https://github.com/pyca/pynacl):: - $ # Install from PyPI - $ pip install --user --upgrade --force pynacl - $ # Install from github - $ pip install --user https://github.com/pyca/pynacl/tarball/master + # Install from PyPI + pip install --user --upgrade --force pynacl + + # Install from github + pip install --user https://github.com/pyca/pynacl/tarball/master ArchLinux ~~~~~~~~~ @@ -321,7 +323,7 @@ ArchLinux If running `$ synctl start` fails with 'returned non-zero exit status 1', you will need to explicitly call Python2.7 - either running as:: - $ python2.7 -m synapse.app.homeserver --daemonize -c homeserver.yaml + python2.7 -m synapse.app.homeserver --daemonize -c homeserver.yaml ...or by editing synctl with the correct python executable. @@ -331,16 +333,16 @@ Synapse Development To check out a synapse for development, clone the git repo into a working directory of your choice:: - $ git clone https://github.com/matrix-org/synapse.git - $ cd synapse + git clone https://github.com/matrix-org/synapse.git + cd synapse Synapse has a number of external dependencies, that are easiest to install using pip and a virtualenv:: - $ virtualenv env - $ source env/bin/activate - $ python synapse/python_dependencies.py | xargs -n1 pip install - $ pip install setuptools_trial mock + virtualenv env + source env/bin/activate + python synapse/python_dependencies.py | xargs -n1 pip install + pip install setuptools_trial mock This will run a process of downloading and installing all the needed dependencies into a virtual env. @@ -348,7 +350,7 @@ dependencies into a virtual env. Once this is done, you may wish to run Synapse's unit tests, to check that everything is installed as it should be:: - $ python setup.py test + python setup.py test This should end with a 'PASSED' result:: @@ -389,11 +391,11 @@ IDs: For the first form, simply pass the required hostname (of the machine) as the --server-name parameter:: - $ python -m synapse.app.homeserver \ + python -m synapse.app.homeserver \ --server-name machine.my.domain.name \ --config-path homeserver.yaml \ --generate-config - $ python -m synapse.app.homeserver --config-path homeserver.yaml + python -m synapse.app.homeserver --config-path homeserver.yaml Alternatively, you can run ``synctl start`` to guide you through the process. @@ -410,11 +412,11 @@ record would then look something like:: At this point, you should then run the homeserver with the hostname of this SRV record, as that is the name other machines will expect it to have:: - $ python -m synapse.app.homeserver \ + python -m synapse.app.homeserver \ --server-name YOURDOMAIN \ --config-path homeserver.yaml \ --generate-config - $ python -m synapse.app.homeserver --config-path homeserver.yaml + python -m synapse.app.homeserver --config-path homeserver.yaml You may additionally want to pass one or more "-v" options, in order to @@ -428,7 +430,7 @@ private federation (``localhost:8080``, ``localhost:8081`` and ``localhost:8082``) which you can then access through the webclient running at http://localhost:8080. Simply run:: - $ demo/start.sh + demo/start.sh This is mainly useful just for development purposes. @@ -502,10 +504,10 @@ Building Internal API Documentation Before building internal API documentation install sphinx and sphinxcontrib-napoleon:: - $ pip install sphinx - $ pip install sphinxcontrib-napoleon + pip install sphinx + pip install sphinxcontrib-napoleon Building internal API documentation:: - $ python setup.py build_sphinx + python setup.py build_sphinx diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f7155fd8d3..90649af9e1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -230,7 +230,11 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_server(self, server_name, room_id, events): states = yield self.store.get_state_for_events( - room_id, [e.event_id for e in events], + room_id, frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, None), + ) ) events_and_states = zip(events, states) @@ -503,7 +507,7 @@ class FederationHandler(BaseHandler): event_ids = list(extremities.keys()) states = yield defer.gatherResults([ - self.state_handler.resolve_state_groups([e]) + self.state_handler.resolve_state_groups(room_id, [e]) for e in event_ids ]) states = dict(zip(event_ids, [s[1] for s in states])) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 6200e10775..c1095708a0 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -44,7 +44,7 @@ class IdentityHandler(BaseHandler): http_client = SimpleHttpClient(self.hs) # XXX: make this configurable! # trustedIdServers = ['matrix.org', 'localhost:8090'] - trustedIdServers = ['matrix.org'] + trustedIdServers = ['matrix.org', 'vector.im'] if 'id_server' in creds: id_server = creds['id_server'] diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 9d6d4f0978..765b14d994 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -138,7 +138,11 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): states = yield self.store.get_state_for_events( - room_id, [e.event_id for e in events], + room_id, frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) ) events_and_states = zip(events, states) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index a1288b4252..f81d75017d 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -73,7 +73,8 @@ class RegistrationHandler(BaseHandler): localpart : The local part of the user ID to register. If None, one will be randomly generated. password (str) : The password to assign to this user so they can - login again. + login again. This can be None which means they cannot login again + via a password (e.g. the user is an application service user). Returns: A tuple of (user_id, access_token). Raises: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6cff6230c1..8f58774b31 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -295,7 +295,11 @@ class SyncHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): states = yield self.store.get_state_for_events( - room_id, [e.event_id for e in events], + room_id, frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) ) events_and_states = zip(events, states) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 0c737d73b8..b5926f9ca6 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -19,7 +19,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import SynapseError, Codes from synapse.http.servlet import RestServlet -from ._base import client_v2_pattern, parse_request_allow_empty +from ._base import client_v2_pattern, parse_json_dict_from_request import logging import hmac @@ -55,30 +55,55 @@ class RegisterRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): yield run_on_reactor() + body = parse_json_dict_from_request(request) - body = parse_request_allow_empty(request) - # we do basic sanity checks here because the auth - # layer will store these in sessions + # we do basic sanity checks here because the auth layer will store these + # in sessions. Pull out the username/password provided to us. + desired_password = None if 'password' in body: - if ((not isinstance(body['password'], str) and - not isinstance(body['password'], unicode)) or + if (not isinstance(body['password'], basestring) or len(body['password']) > 512): raise SynapseError(400, "Invalid password") + desired_password = body["password"] + desired_username = None if 'username' in body: - if ((not isinstance(body['username'], str) and - not isinstance(body['username'], unicode)) or + if (not isinstance(body['username'], basestring) or len(body['username']) > 512): raise SynapseError(400, "Invalid username") desired_username = body['username'] - yield self.registration_handler.check_username(desired_username) - - is_using_shared_secret = False - is_application_server = False - service = None + appservice = None if 'access_token' in request.args: - service = yield self.auth.get_appservice_by_req(request) + appservice = yield self.auth.get_appservice_by_req(request) + + # fork off as soon as possible for ASes and shared secret auth which + # have completely different registration flows to normal users + + # == Application Service Registration == + if appservice: + result = yield self._do_appservice_registration( + desired_username, request.args["access_token"][0] + ) + defer.returnValue((200, result)) # we throw for non 200 responses + return + + # == Shared Secret Registration == (e.g. create new user scripts) + if 'mac' in body: + # FIXME: Should we really be determining if this is shared secret + # auth based purely on the 'mac' key? + result = yield self._do_shared_secret_registration( + desired_username, desired_password, body["mac"] + ) + defer.returnValue((200, result)) # we throw for non 200 responses + return + + # == Normal User Registration == (everyone else) + if self.hs.config.disable_registration: + raise SynapseError(403, "Registration has been disabled") + + if desired_username is not None: + yield self.registration_handler.check_username(desired_username) if self.hs.config.enable_registration_captcha: flows = [ @@ -91,39 +116,20 @@ class RegisterRestServlet(RestServlet): [LoginType.EMAIL_IDENTITY] ] - result = None - if service: - is_application_server = True - params = body - elif 'mac' in body: - # Check registration-specific shared secret auth - if 'username' not in body: - raise SynapseError(400, "", Codes.MISSING_PARAM) - self._check_shared_secret_auth( - body['username'], body['mac'] - ) - is_using_shared_secret = True - params = body - else: - authed, result, params = yield self.auth_handler.check_auth( - flows, body, self.hs.get_ip_from_request(request) - ) - - if not authed: - defer.returnValue((401, result)) - - can_register = ( - not self.hs.config.disable_registration - or is_application_server - or is_using_shared_secret + authed, result, params = yield self.auth_handler.check_auth( + flows, body, self.hs.get_ip_from_request(request) ) - if not can_register: - raise SynapseError(403, "Registration has been disabled") + if not authed: + defer.returnValue((401, result)) + return + + # NB: This may be from the auth handler and NOT from the POST if 'password' not in params: - raise SynapseError(400, "", Codes.MISSING_PARAM) - desired_username = params['username'] if 'username' in params else None - new_password = params['password'] + raise SynapseError(400, "Missing password.", Codes.MISSING_PARAM) + + desired_username = params.get("username", None) + new_password = params.get("password", None) (user_id, token) = yield self.registration_handler.register( localpart=desired_username, @@ -156,18 +162,21 @@ class RegisterRestServlet(RestServlet): else: logger.info("bind_email not specified: not binding email") - result = { - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - } - + result = self._create_registration_details(user_id, token) defer.returnValue((200, result)) def on_OPTIONS(self, _): return 200, {} - def _check_shared_secret_auth(self, username, mac): + @defer.inlineCallbacks + def _do_appservice_registration(self, username, as_token): + (user_id, token) = yield self.registration_handler.appservice_register( + username, as_token + ) + defer.returnValue(self._create_registration_details(user_id, token)) + + @defer.inlineCallbacks + def _do_shared_secret_registration(self, username, password, mac): if not self.hs.config.registration_shared_secret: raise SynapseError(400, "Shared secret registration is not enabled") @@ -183,13 +192,23 @@ class RegisterRestServlet(RestServlet): digestmod=sha1, ).hexdigest() - if compare_digest(want_mac, got_mac): - return True - else: + if not compare_digest(want_mac, got_mac): raise SynapseError( 403, "HMAC incorrect", ) + (user_id, token) = yield self.registration_handler.register( + localpart=username, password=password + ) + defer.returnValue(self._create_registration_details(user_id, token)) + + def _create_registration_details(self, user_id, token): + return { + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname, + } + def register_servlets(hs, http_server): RegisterRestServlet(hs).register(http_server) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index c43ae0314b..84e1961a21 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -244,43 +244,52 @@ class BaseMediaResource(Resource): ) return - scales = set() - crops = set() - for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, + local_thumbnails = [] + + def generate_thumbnails(): + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + + local_thumbnails.append(( + media_id, t_width, t_height, t_type, t_method, t_len )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + local_thumbnails.append(( + media_id, t_width, t_height, t_type, t_method, t_len + )) - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) + yield threads.deferToThread(generate_thumbnails) + + for l in local_thumbnails: + yield self.store.store_local_thumbnail(*l) defer.returnValue({ "width": m_width, diff --git a/synapse/state.py b/synapse/state.py index 80da90a72c..b5e5d7bbda 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -96,7 +96,7 @@ class StateHandler(object): cache.ts = self.clock.time_msec() state = cache.state else: - res = yield self.resolve_state_groups(event_ids) + res = yield self.resolve_state_groups(room_id, event_ids) state = res[1] if event_type: @@ -155,13 +155,13 @@ class StateHandler(object): if event.is_state(): ret = yield self.resolve_state_groups( - [e for e, _ in event.prev_events], + event.room_id, [e for e, _ in event.prev_events], event_type=event.type, state_key=event.state_key, ) else: ret = yield self.resolve_state_groups( - [e for e, _ in event.prev_events], + event.room_id, [e for e, _ in event.prev_events], ) group, curr_state, prev_state = ret @@ -180,7 +180,7 @@ class StateHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, event_ids, event_type=None, state_key=""): + def resolve_state_groups(self, room_id, event_ids, event_type=None, state_key=""): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -205,7 +205,7 @@ class StateHandler(object): ) state_groups = yield self.store.get_state_groups( - event_ids + room_id, event_ids ) logger.debug( diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index f1265541ba..7b45db4e68 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -18,6 +18,7 @@ from synapse.api.errors import StoreError from synapse.util.logutils import log_function from synapse.util.logcontext import preserve_context_over_fn, LoggingContext from synapse.util.lrucache import LruCache +from synapse.util.dictionary_cache import DictionaryCache import synapse.metrics from util.id_generators import IdGenerator, StreamIdGenerator @@ -58,7 +59,7 @@ cache_counter = metrics.register_cache( class Cache(object): - def __init__(self, name, max_entries=1000, keylen=1, lru=False): + def __init__(self, name, max_entries=1000, keylen=1, lru=True): if lru: self.cache = LruCache(max_size=max_entries) self.max_entries = None @@ -72,6 +73,11 @@ class Cache(object): self.thread = None caches_by_name[name] = self.cache + class Sentinel(object): + __slots__ = [] + + self.sentinel = Sentinel() + def check_thread(self): expected_thread = self.thread if expected_thread is None: @@ -83,22 +89,33 @@ class Cache(object): ) def get(self, *keyargs): - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) + try: + if len(keyargs) != self.keylen: + raise ValueError("Expected a key to have %d items", self.keylen) - if keyargs in self.cache: - cache_counter.inc_hits(self.name) - return self.cache[keyargs] + val = self.cache.get(keyargs, self.sentinel) + if val is not self.sentinel: + cache_counter.inc_hits(self.name) + return val - cache_counter.inc_misses(self.name) - raise KeyError() + cache_counter.inc_misses(self.name) + raise KeyError() + except KeyError: + raise + except: + logger.exception("Cache.get failed for %s" % (self.name,)) + raise def update(self, sequence, *args): - self.check_thread() - if self.sequence == sequence: - # Only update the cache if the caches sequence number matches the - # number that the cache had before the SELECT was started (SYN-369) - self.prefill(*args) + try: + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + self.prefill(*args) + except: + logger.exception("Cache.update failed for %s" % (self.name,)) + raise def prefill(self, *args): # because I can't *keyargs, value keyargs = args[:-1] @@ -142,7 +159,7 @@ class CacheDescriptor(object): which can be used to insert values into the cache specifically, without calling the calculation function. """ - def __init__(self, orig, max_entries=1000, num_args=1, lru=False, + def __init__(self, orig, max_entries=1000, num_args=1, lru=True, inlineCallbacks=False): self.orig = orig @@ -210,7 +227,7 @@ class CacheDescriptor(object): return wrapped -def cached(max_entries=1000, num_args=1, lru=False): +def cached(max_entries=1000, num_args=1, lru=True): return lambda orig: CacheDescriptor( orig, max_entries=max_entries, @@ -349,6 +366,8 @@ class SQLBaseStore(object): self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True, max_entries=hs.config.event_cache_size) + self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000) + self._event_fetch_lock = threading.Condition() self._event_fetch_list = [] self._event_fetch_ongoing = 0 diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4db07f6fb4..55dd3f6cfb 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -78,7 +78,7 @@ class RoomMemberStore(SQLBaseStore): lambda events: events[0] if events else None ) - @cached() + @cached(max_entries=5000) def get_users_in_room(self, room_id): def f(txn): @@ -154,7 +154,7 @@ class RoomMemberStore(SQLBaseStore): RoomsForUser(**r) for r in self.cursor_to_dict(txn) ] - @cached() + @cached(max_entries=5000) def get_joined_hosts_for_room(self, room_id): return self.runInteraction( "get_joined_hosts_for_room", diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 55c6d52890..48a4023558 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -17,6 +17,7 @@ from ._base import SQLBaseStore, cached, cachedInlineCallbacks from twisted.internet import defer +from synapse.util import unwrapFirstError from synapse.util.stringutils import random_string import logging @@ -44,52 +45,38 @@ class StateStore(SQLBaseStore): """ @defer.inlineCallbacks - def get_state_groups(self, event_ids): + def get_state_groups(self, room_id, event_ids): """ Get the state groups for the given list of event_ids The return value is a dict mapping group names to lists of events. """ - def f(txn): - groups = set() - for event_id in event_ids: - group = self._simple_select_one_onecol_txn( - txn, - table="event_to_state_groups", - keyvalues={"event_id": event_id}, - retcol="state_group", - allow_none=True, - ) - if group: - groups.add(group) - - res = {} - for group in groups: - state_ids = self._simple_select_onecol_txn( - txn, - table="state_groups_state", - keyvalues={"state_group": group}, - retcol="event_id", - ) - - res[group] = state_ids - - return res + event_and_groups = yield defer.gatherResults( + [ + self._get_state_group_for_event( + room_id, event_id, + ).addCallback(lambda group, event_id: (event_id, group), event_id) + for event_id in event_ids + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) - states = yield self.runInteraction( - "get_state_groups", - f, - ) + groups = set(group for _, group in event_and_groups if group) - state_list = yield defer.gatherResults( + group_to_state = yield defer.gatherResults( [ - self._fetch_events_for_group(group, vals) - for group, vals in states.items() + self._get_state_for_group( + group, + ).addCallback(lambda state_dict, group: (group, state_dict), group) + for group in groups ], consumeErrors=True, - ) + ).addErrback(unwrapFirstError) - defer.returnValue(dict(state_list)) + defer.returnValue({ + group: state_map.values() + for group, state_map in group_to_state + }) @cached(num_args=1) def _fetch_events_for_group(self, key, events): @@ -205,65 +192,195 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - @defer.inlineCallbacks - def get_state_for_events(self, room_id, event_ids): + @cached(num_args=2, lru=True, max_entries=10000) + def _get_state_groups_from_group(self, group, types): def f(txn): - groups = set() - event_to_group = {} - for event_id in event_ids: - # TODO: Remove this loop. - group = self._simple_select_one_onecol_txn( - txn, - table="event_to_state_groups", - keyvalues={"event_id": event_id}, - retcol="state_group", - allow_none=True, - ) - if group: - event_to_group[event_id] = group - groups.add(group) - - group_to_state_ids = {} - for group in groups: - state_ids = self._simple_select_onecol_txn( - txn, - table="state_groups_state", - keyvalues={"state_group": group}, - retcol="event_id", + if types is not None: + where_clause = "AND (%s)" % ( + " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) + else: + where_clause = "" + + sql = ( + "SELECT event_id FROM state_groups_state WHERE" + " state_group = ? %s" + ) % (where_clause,) + + args = [group] + if types is not None: + args.extend([i for typ in types for i in typ]) + + txn.execute(sql, args) + + return group, [ + r[0] + for r in txn.fetchall() + ] + + return self.runInteraction( + "_get_state_groups_from_group", + f, + ) + + @cached(num_args=3, lru=True, max_entries=20000) + def _get_state_for_event_id(self, room_id, event_id, types): + def f(txn): + type_and_state_sql = " OR ".join([ + "(type = ? AND state_key = ?)" + if typ[1] is not None + else "type = ?" + for typ in types + ]) - group_to_state_ids[group] = state_ids + sql = ( + "SELECT sg.event_id FROM state_groups_state as sg" + " INNER JOIN event_to_state_groups as e" + " ON e.state_group = sg.state_group" + " WHERE e.event_id = ? AND (%s)" + ) % (type_and_state_sql,) + + args = [event_id] + for typ, state_key in types: + args.extend( + [typ, state_key] if state_key is not None else [typ] + ) + txn.execute(sql, args) - return event_to_group, group_to_state_ids + return event_id, [ + r[0] + for r in txn.fetchall() + ] - res = yield self.runInteraction( - "annotate_events_with_state_groups", + return self.runInteraction( + "_get_state_for_event_id", f, ) - event_to_group, group_to_state_ids = res + @defer.inlineCallbacks + def get_state_for_events(self, room_id, event_ids, types): + """Given a list of event_ids and type tuples, return a list of state + dicts for each event. The state dicts will only have the type/state_keys + that are in the `types` list. + + Args: + room_id (str) + event_ids (list) + types (list): List of (type, state_key) tuples which are used to + filter the state fetched. `state_key` may be None, which matches + any `state_key` + + Returns: + deferred: A list of dicts corresponding to the event_ids given. + The dicts are mappings from (type, state_key) -> state_events + """ + event_and_groups = yield defer.gatherResults( + [ + self._get_state_group_for_event( + room_id, event_id, + ).addCallback(lambda group, event_id: (event_id, group), event_id) + for event_id in event_ids + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) + + groups = set(group for _, group in event_and_groups) - state_list = yield defer.gatherResults( + res = yield defer.gatherResults( [ - self._fetch_events_for_group(group, vals) - for group, vals in group_to_state_ids.items() + self._get_state_for_group( + group, types + ).addCallback(lambda state_dict, group: (group, state_dict), group) + for group in groups ], consumeErrors=True, - ) + ).addErrback(unwrapFirstError) - state_dict = { - group: { - (ev.type, ev.state_key): ev - for ev in state - } - for group, state in state_list + group_to_state = dict(res) + + event_to_state = { + event_id: group_to_state[group] + for event_id, group in event_and_groups } defer.returnValue([ - state_dict.get(event_to_group.get(event, None), None) + event_to_state[event] for event in event_ids ]) + @cached(num_args=2, lru=True, max_entries=100000) + def _get_state_group_for_event(self, room_id, event_id): + return self._simple_select_one_onecol( + table="event_to_state_groups", + keyvalues={ + "event_id": event_id, + }, + retcol="state_group", + allow_none=True, + desc="_get_state_group_for_event", + ) + + @defer.inlineCallbacks + def _get_state_for_group(self, group, types=None): + is_all, state_dict = self._state_group_cache.get(group) + + type_to_key = {} + missing_types = set() + if types is not None: + for typ, state_key in types: + if state_key is None: + type_to_key[typ] = None + missing_types.add((typ, state_key)) + else: + if type_to_key.get(typ, object()) is not None: + type_to_key.setdefault(typ, set()).add(state_key) + + if (typ, state_key) not in state_dict: + missing_types.add((typ, state_key)) + + if is_all and types is None: + defer.returnValue(state_dict) + + if is_all or (types is not None and not missing_types): + def include(typ, state_key): + sentinel = object() + valid_state_keys = type_to_key.get(typ, sentinel) + if valid_state_keys is sentinel: + return False + if valid_state_keys is None: + return True + if state_key in valid_state_keys: + return True + return False + + defer.returnValue({ + k: v + for k, v in state_dict.items() + if include(k[0], k[1]) + }) + + # Okay, so we have some missing_types, lets fetch them. + cache_seq_num = self._state_group_cache.sequence + _, state_ids = yield self._get_state_groups_from_group( + group, + frozenset(types) if types else None + ) + state_events = yield self._get_events(state_ids, get_prev_content=False) + state_dict = { + (e.type, e.state_key): e + for e in state_events + } + + # Update the cache + self._state_group_cache.update( + cache_seq_num, + key=group, + value=state_dict, + full=(types is None), + ) + + defer.returnValue(state_dict) + def _make_group_id(clock): return str(int(clock.time_msec())) + random_string(5) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index af45fc5619..9db259d5fc 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -300,8 +300,7 @@ class StreamStore(SQLBaseStore): defer.returnValue((events, token)) @defer.inlineCallbacks - def get_recent_events_for_room(self, room_id, limit, end_token, - with_feedback=False, from_token=None): + def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): # TODO (erikj): Handle compressed feedback end_token = RoomStreamToken.parse_stream_token(end_token) diff --git a/synapse/util/dictionary_cache.py b/synapse/util/dictionary_cache.py new file mode 100644 index 0000000000..38b131677c --- /dev/null +++ b/synapse/util/dictionary_cache.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util.lrucache import LruCache +from collections import namedtuple +import threading +import logging + + +logger = logging.getLogger(__name__) + + +DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value")) + + +class DictionaryCache(object): + + def __init__(self, name, max_entries=1000): + self.cache = LruCache(max_size=max_entries) + + self.name = name + self.sequence = 0 + self.thread = None + # caches_by_name[name] = self.cache + + class Sentinel(object): + __slots__ = [] + + self.sentinel = Sentinel() + + def check_thread(self): + expected_thread = self.thread + if expected_thread is None: + self.thread = threading.current_thread() + else: + if expected_thread is not threading.current_thread(): + raise ValueError( + "Cache objects can only be accessed from the main thread" + ) + + def get(self, key, dict_keys=None): + try: + entry = self.cache.get(key, self.sentinel) + if entry is not self.sentinel: + # cache_counter.inc_hits(self.name) + + if dict_keys is None: + return DictionaryEntry(entry.full, dict(entry.value)) + else: + return DictionaryEntry(entry.full, { + k: entry.value[k] + for k in dict_keys + if k in entry.value + }) + + # cache_counter.inc_misses(self.name) + return DictionaryEntry(False, {}) + except: + logger.exception("get failed") + raise + + def invalidate(self, key): + self.check_thread() + + # Increment the sequence number so that any SELECT statements that + # raced with the INSERT don't update the cache (SYN-369) + self.sequence += 1 + self.cache.pop(key, None) + + def invalidate_all(self): + self.check_thread() + self.sequence += 1 + self.cache.clear() + + def update(self, sequence, key, value, full=False): + try: + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + if full: + self._insert(key, value) + else: + self._update_or_insert(key, value) + except: + logger.exception("update failed") + raise + + def _update_or_insert(self, key, value): + entry = self.cache.setdefault(key, DictionaryEntry(False, {})) + entry.value.update(value) + + def _insert(self, key, value): + self.cache[key] = DictionaryEntry(True, value) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py new file mode 100644 index 0000000000..66fd25964d --- /dev/null +++ b/tests/rest/client/v2_alpha/test_register.py @@ -0,0 +1,134 @@ +from synapse.rest.client.v2_alpha.register import RegisterRestServlet +from synapse.api.errors import SynapseError +from twisted.internet import defer +from mock import Mock, MagicMock +from tests import unittest +import json + + +class RegisterRestServletTestCase(unittest.TestCase): + + def setUp(self): + # do the dance to hook up request data to self.request_data + self.request_data = "" + self.request = Mock( + content=Mock(read=Mock(side_effect=lambda: self.request_data)), + ) + self.request.args = {} + + self.appservice = None + self.auth = Mock(get_appservice_by_req=Mock( + side_effect=lambda x: defer.succeed(self.appservice)) + ) + + self.auth_result = (False, None, None) + self.auth_handler = Mock( + check_auth=Mock(side_effect=lambda x,y,z: self.auth_result) + ) + self.registration_handler = Mock() + self.identity_handler = Mock() + self.login_handler = Mock() + + # do the dance to hook it up to the hs global + self.handlers = Mock( + auth_handler=self.auth_handler, + registration_handler=self.registration_handler, + identity_handler=self.identity_handler, + login_handler=self.login_handler + ) + self.hs = Mock() + self.hs.hostname = "superbig~testing~thing.com" + self.hs.get_auth = Mock(return_value=self.auth) + self.hs.get_handlers = Mock(return_value=self.handlers) + self.hs.config.disable_registration = False + + # init the thing we're testing + self.servlet = RegisterRestServlet(self.hs) + + @defer.inlineCallbacks + def test_POST_appservice_registration_valid(self): + user_id = "@kermit:muppet" + token = "kermits_access_token" + self.request.args = { + "access_token": "i_am_an_app_service" + } + self.request_data = json.dumps({ + "username": "kermit" + }) + self.appservice = { + "id": "1234" + } + self.registration_handler.appservice_register = Mock( + return_value=(user_id, token) + ) + result = yield self.servlet.on_POST(self.request) + self.assertEquals(result, (200, { + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname + })) + + @defer.inlineCallbacks + def test_POST_appservice_registration_invalid(self): + self.request.args = { + "access_token": "i_am_an_app_service" + } + self.request_data = json.dumps({ + "username": "kermit" + }) + self.appservice = None # no application service exists + result = yield self.servlet.on_POST(self.request) + self.assertEquals(result, (401, None)) + + def test_POST_bad_password(self): + self.request_data = json.dumps({ + "username": "kermit", + "password": 666 + }) + d = self.servlet.on_POST(self.request) + return self.assertFailure(d, SynapseError) + + def test_POST_bad_username(self): + self.request_data = json.dumps({ + "username": 777, + "password": "monkey" + }) + d = self.servlet.on_POST(self.request) + return self.assertFailure(d, SynapseError) + + @defer.inlineCallbacks + def test_POST_user_valid(self): + user_id = "@kermit:muppet" + token = "kermits_access_token" + self.request_data = json.dumps({ + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.check_username = Mock(return_value=True) + self.auth_result = (True, None, { + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.register = Mock(return_value=(user_id, token)) + + result = yield self.servlet.on_POST(self.request) + self.assertEquals(result, (200, { + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname + })) + + def test_POST_disabled_registration(self): + self.hs.config.disable_registration = True + self.request_data = json.dumps({ + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.check_username = Mock(return_value=True) + self.auth_result = (True, None, { + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.register = Mock(return_value=("@user:id", "t")) + d = self.servlet.on_POST(self.request) + return self.assertFailure(d, SynapseError) \ No newline at end of file diff --git a/tests/test_state.py b/tests/test_state.py index fea25f7021..5845358754 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -69,7 +69,7 @@ class StateGroupStore(object): self._next_group = 1 - def get_state_groups(self, event_ids): + def get_state_groups(self, room_id, event_ids): groups = {} for event_id in event_ids: group = self._event_to_state_group.get(event_id) diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py new file mode 100644 index 0000000000..79bc1225d6 --- /dev/null +++ b/tests/util/test_dict_cache.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer +from tests import unittest + +from synapse.util.dictionary_cache import DictionaryCache + + +class DictCacheTestCase(unittest.TestCase): + + def setUp(self): + self.cache = DictionaryCache("foobar") + + def test_simple_cache_hit_full(self): + key = "test_simple_cache_hit_full" + + v = self.cache.get(key) + self.assertEqual((False, {}), v) + + seq = self.cache.sequence + test_value = {"test": "test_simple_cache_hit_full"} + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key) + self.assertEqual(test_value, c.value) + + def test_simple_cache_hit_partial(self): + key = "test_simple_cache_hit_partial" + + seq = self.cache.sequence + test_value = { + "test": "test_simple_cache_hit_partial" + } + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key, ["test"]) + self.assertEqual(test_value, c.value) + + def test_simple_cache_miss_partial(self): + key = "test_simple_cache_miss_partial" + + seq = self.cache.sequence + test_value = { + "test": "test_simple_cache_miss_partial" + } + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key, ["test2"]) + self.assertEqual({}, c.value) + + def test_simple_cache_hit_miss_partial(self): + key = "test_simple_cache_hit_miss_partial" + + seq = self.cache.sequence + test_value = { + "test": "test_simple_cache_hit_miss_partial", + "test2": "test_simple_cache_hit_miss_partial2", + "test3": "test_simple_cache_hit_miss_partial3", + } + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key, ["test2"]) + self.assertEqual({"test2": "test_simple_cache_hit_miss_partial2"}, c.value) + + def test_multi_insert(self): + key = "test_simple_cache_hit_miss_partial" + + seq = self.cache.sequence + test_value_1 = { + "test": "test_simple_cache_hit_miss_partial", + } + self.cache.update(seq, key, test_value_1, full=False) + + seq = self.cache.sequence + test_value_2 = { + "test2": "test_simple_cache_hit_miss_partial2", + } + self.cache.update(seq, key, test_value_2, full=False) + + c = self.cache.get(key) + self.assertEqual( + { + "test": "test_simple_cache_hit_miss_partial", + "test2": "test_simple_cache_hit_miss_partial2", + }, + c.value + ) |