summary refs log tree commit diff
path: root/synapse/api
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/api')
-rw-r--r--synapse/api/auth.py116
-rw-r--r--synapse/api/constants.py5
-rw-r--r--synapse/api/errors.py132
-rw-r--r--synapse/api/filtering.py300
4 files changed, 403 insertions, 150 deletions
diff --git a/synapse/api/auth.py b/synapse/api/auth.py

index 03a215ab1b..f17fda6315 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py
@@ -23,7 +23,8 @@ from synapse import event_auth from synapse.api.constants import EventTypes, Membership, JoinRules from synapse.api.errors import AuthError, Codes from synapse.types import UserID -from synapse.util.logcontext import preserve_context_over_fn +from synapse.util.caches import register_cache, CACHE_SIZE_FACTOR +from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import Measure logger = logging.getLogger(__name__) @@ -39,6 +40,10 @@ AuthEventTypes = ( GUEST_DEVICE_ID = "guest_device" +class _InvalidMacaroonException(Exception): + pass + + class Auth(object): """ FIXME: This class contains a mix of functions for authenticating users @@ -51,6 +56,9 @@ class Auth(object): self.state = hs.get_state_handler() self.TOKEN_NOT_FOUND_HTTP_STATUS = 401 + self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000) + register_cache("token_cache", self.token_cache) + @defer.inlineCallbacks def check_from_context(self, event, context, do_sig_check=True): auth_events_ids = yield self.compute_auth_events( @@ -144,17 +152,8 @@ class Auth(object): @defer.inlineCallbacks def check_host_in_room(self, room_id, host): with Measure(self.clock, "check_host_in_room"): - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - - logger.debug("calling resolve_state_groups from check_host_in_room") - entry = yield self.state.resolve_state_groups( - room_id, latest_event_ids - ) - - ret = yield self.store.is_host_joined( - room_id, host, entry.state_group, entry.state - ) - defer.returnValue(ret) + latest_event_ids = yield self.store.is_host_joined(room_id, host) + defer.returnValue(latest_event_ids) def _check_joined_room(self, member, user_id, room_id): if not member or member.membership != Membership.JOIN: @@ -205,13 +204,12 @@ class Auth(object): ip_addr = self.hs.get_ip_from_request(request) user_agent = request.requestHeaders.getRawHeaders( - "User-Agent", - default=[""] + b"User-Agent", + default=[b""] )[0] if user and access_token and ip_addr: - preserve_context_over_fn( - self.store.insert_client_ip, - user=user, + self.store.insert_client_ip( + user_id=user.to_string(), access_token=access_token, ip=ip_addr, user_agent=user_agent, @@ -272,13 +270,17 @@ class Auth(object): rights (str): The operation being performed; the access token must allow this. Returns: - dict : dict that includes the user and the ID of their access token. + Deferred[dict]: dict that includes: + `user` (UserID) + `is_guest` (bool) + `token_id` (int|None): access token id. May be None if guest + `device_id` (str|None): device corresponding to access token Raises: AuthError if no user by that token exists or the token is invalid. """ try: - macaroon = pymacaroons.Macaroon.deserialize(token) - except Exception: # deserialize can throw more-or-less anything + user_id, guest = self._parse_and_validate_macaroon(token, rights) + except _InvalidMacaroonException: # doesn't look like a macaroon: treat it as an opaque token which # must be in the database. # TODO: it would be nice to get rid of this, but apparently some @@ -287,19 +289,8 @@ class Auth(object): defer.returnValue(r) try: - user_id = self.get_user_id_from_macaroon(macaroon) user = UserID.from_string(user_id) - self.validate_macaroon( - macaroon, rights, self.hs.config.expire_access_token, - user_id=user_id, - ) - - guest = False - for caveat in macaroon.caveats: - if caveat.caveat_id == "guest = true": - guest = True - if guest: # Guest access tokens are not stored in the database (there can # only be one access token per guest, anyway). @@ -371,6 +362,55 @@ class Auth(object): errcode=Codes.UNKNOWN_TOKEN ) + def _parse_and_validate_macaroon(self, token, rights="access"): + """Takes a macaroon and tries to parse and validate it. This is cached + if and only if rights == access and there isn't an expiry. + + On invalid macaroon raises _InvalidMacaroonException + + Returns: + (user_id, is_guest) + """ + if rights == "access": + cached = self.token_cache.get(token, None) + if cached: + return cached + + try: + macaroon = pymacaroons.Macaroon.deserialize(token) + except Exception: # deserialize can throw more-or-less anything + # doesn't look like a macaroon: treat it as an opaque token which + # must be in the database. + # TODO: it would be nice to get rid of this, but apparently some + # people use access tokens which aren't macaroons + raise _InvalidMacaroonException() + + try: + user_id = self.get_user_id_from_macaroon(macaroon) + + has_expiry = False + guest = False + for caveat in macaroon.caveats: + if caveat.caveat_id.startswith("time "): + has_expiry = True + elif caveat.caveat_id == "guest = true": + guest = True + + self.validate_macaroon( + macaroon, rights, self.hs.config.expire_access_token, + user_id=user_id, + ) + except (pymacaroons.exceptions.MacaroonException, TypeError, ValueError): + raise AuthError( + self.TOKEN_NOT_FOUND_HTTP_STATUS, "Invalid macaroon passed.", + errcode=Codes.UNKNOWN_TOKEN + ) + + if not has_expiry and rights == "access": + self.token_cache[token] = (user_id, guest) + + return user_id, guest + def get_user_id_from_macaroon(self, macaroon): """Retrieve the user_id given by the caveats on the macaroon. @@ -483,6 +523,14 @@ class Auth(object): ) def is_server_admin(self, user): + """ Check if the given user is a local server admin. + + Args: + user (str): mxid of user to check + + Returns: + bool: True if the user is an admin + """ return self.store.is_server_admin(user) @defer.inlineCallbacks @@ -624,7 +672,7 @@ def has_access_token(request): bool: False if no access_token was given, True otherwise. """ query_params = request.args.get("access_token") - auth_headers = request.requestHeaders.getRawHeaders("Authorization") + auth_headers = request.requestHeaders.getRawHeaders(b"Authorization") return bool(query_params) or bool(auth_headers) @@ -644,8 +692,8 @@ def get_access_token_from_request(request, token_not_found_http_status=401): AuthError: If there isn't an access_token in the request. """ - auth_headers = request.requestHeaders.getRawHeaders("Authorization") - query_params = request.args.get("access_token") + auth_headers = request.requestHeaders.getRawHeaders(b"Authorization") + query_params = request.args.get(b"access_token") if auth_headers: # Try the get the access_token from a "Authorization: Bearer" # header diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index ca23c9c460..5baba43966 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py
@@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2017 Vector Creations Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +16,9 @@ """Contains constants from the specification.""" +# the "depth" field on events is limited to 2**63 - 1 +MAX_DEPTH = 2**63 - 1 + class Membership(object): @@ -44,6 +48,7 @@ class JoinRules(object): class LoginType(object): PASSWORD = u"m.login.password" EMAIL_IDENTITY = u"m.login.email.identity" + MSISDN = u"m.login.msisdn" RECAPTCHA = u"m.login.recaptcha" DUMMY = u"m.login.dummy" diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 921c457738..a9ff5576f3 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py
@@ -17,6 +17,9 @@ import logging +import simplejson as json +from six import iteritems + logger = logging.getLogger(__name__) @@ -45,32 +48,52 @@ class Codes(object): THREEPID_AUTH_FAILED = "M_THREEPID_AUTH_FAILED" THREEPID_IN_USE = "M_THREEPID_IN_USE" THREEPID_NOT_FOUND = "M_THREEPID_NOT_FOUND" + THREEPID_DENIED = "M_THREEPID_DENIED" INVALID_USERNAME = "M_INVALID_USERNAME" SERVER_NOT_TRUSTED = "M_SERVER_NOT_TRUSTED" class CodeMessageException(RuntimeError): - """An exception with integer code and message string attributes.""" + """An exception with integer code and message string attributes. + Attributes: + code (int): HTTP error code + msg (str): string describing the error + """ def __init__(self, code, msg): super(CodeMessageException, self).__init__("%d: %s" % (code, msg)) self.code = code self.msg = msg - self.response_code_message = None def error_dict(self): return cs_error(self.msg) +class MatrixCodeMessageException(CodeMessageException): + """An error from a general matrix endpoint, eg. from a proxied Matrix API call. + + Attributes: + errcode (str): Matrix error code e.g 'M_FORBIDDEN' + """ + def __init__(self, code, msg, errcode=Codes.UNKNOWN): + super(MatrixCodeMessageException, self).__init__(code, msg) + self.errcode = errcode + + class SynapseError(CodeMessageException): - """A base error which can be caught for all synapse events.""" + """A base exception type for matrix errors which have an errcode and error + message (as well as an HTTP status code). + + Attributes: + errcode (str): Matrix error code e.g 'M_FORBIDDEN' + """ def __init__(self, code, msg, errcode=Codes.UNKNOWN): """Constructs a synapse error. Args: code (int): The integer error code (an HTTP response code) msg (str): The human-readable error message. - err (str): The error code e.g 'M_FORBIDDEN' + errcode (str): The matrix error code e.g 'M_FORBIDDEN' """ super(SynapseError, self).__init__(code, msg) self.errcode = errcode @@ -81,12 +104,87 @@ class SynapseError(CodeMessageException): self.errcode, ) + @classmethod + def from_http_response_exception(cls, err): + """Make a SynapseError based on an HTTPResponseException + + This is useful when a proxied request has failed, and we need to + decide how to map the failure onto a matrix error to send back to the + client. + + An attempt is made to parse the body of the http response as a matrix + error. If that succeeds, the errcode and error message from the body + are used as the errcode and error message in the new synapse error. + + Otherwise, the errcode is set to M_UNKNOWN, and the error message is + set to the reason code from the HTTP response. + + Args: + err (HttpResponseException): + + Returns: + SynapseError: + """ + # try to parse the body as json, to get better errcode/msg, but + # default to M_UNKNOWN with the HTTP status as the error text + try: + j = json.loads(err.response) + except ValueError: + j = {} + errcode = j.get('errcode', Codes.UNKNOWN) + errmsg = j.get('error', err.msg) + + res = SynapseError(err.code, errmsg, errcode) + return res + class RegistrationError(SynapseError): """An error raised when a registration event fails.""" pass +class FederationDeniedError(SynapseError): + """An error raised when the server tries to federate with a server which + is not on its federation whitelist. + + Attributes: + destination (str): The destination which has been denied + """ + + def __init__(self, destination): + """Raised by federation client or server to indicate that we are + are deliberately not attempting to contact a given server because it is + not on our federation whitelist. + + Args: + destination (str): the domain in question + """ + + self.destination = destination + + super(FederationDeniedError, self).__init__( + code=403, + msg="Federation denied with %s." % (self.destination,), + errcode=Codes.FORBIDDEN, + ) + + +class InteractiveAuthIncompleteError(Exception): + """An error raised when UI auth is not yet complete + + (This indicates we should return a 401 with 'result' as the body) + + Attributes: + result (dict): the server response to the request, which should be + passed back to the client + """ + def __init__(self, result): + super(InteractiveAuthIncompleteError, self).__init__( + "Interactive auth not yet complete", + ) + self.result = result + + class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" def __init__(self, *args, **kwargs): @@ -106,13 +204,11 @@ class UnrecognizedRequestError(SynapseError): class NotFoundError(SynapseError): """An error indicating we can't find the thing you asked for""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.NOT_FOUND + def __init__(self, msg="Not found", errcode=Codes.NOT_FOUND): super(NotFoundError, self).__init__( 404, - "Not found", - **kwargs + msg, + errcode=errcode ) @@ -173,7 +269,6 @@ class LimitExceededError(SynapseError): errcode=Codes.LIMIT_EXCEEDED): super(LimitExceededError, self).__init__(code, msg, errcode) self.retry_after_ms = retry_after_ms - self.response_code_message = "Too Many Requests" def error_dict(self): return cs_error( @@ -203,7 +298,7 @@ def cs_error(msg, code=Codes.UNKNOWN, **kwargs): A dict representing the error response JSON. """ err = {"error": msg, "errcode": code} - for key, value in kwargs.iteritems(): + for key, value in iteritems(kwargs): err[key] = value return err @@ -243,6 +338,19 @@ class FederationError(RuntimeError): class HttpResponseException(CodeMessageException): + """ + Represents an HTTP-level failure of an outbound request + + Attributes: + response (str): body of response + """ def __init__(self, code, msg, response): - self.response = response + """ + + Args: + code (int): HTTP status code + msg (str): reason phrase from HTTP response status line + response (str): body of response + """ super(HttpResponseException, self).__init__(code, msg) + self.response = response diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index fb291d7fb9..db43219d24 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py
@@ -13,11 +13,174 @@ # See the License for the specific language governing permissions and # limitations under the License. from synapse.api.errors import SynapseError +from synapse.storage.presence import UserPresenceState from synapse.types import UserID, RoomID - from twisted.internet import defer -import ujson as json +import simplejson as json +import jsonschema +from jsonschema import FormatChecker + +FILTER_SCHEMA = { + "additionalProperties": False, + "type": "object", + "properties": { + "limit": { + "type": "number" + }, + "senders": { + "$ref": "#/definitions/user_id_array" + }, + "not_senders": { + "$ref": "#/definitions/user_id_array" + }, + # TODO: We don't limit event type values but we probably should... + # check types are valid event types + "types": { + "type": "array", + "items": { + "type": "string" + } + }, + "not_types": { + "type": "array", + "items": { + "type": "string" + } + } + } +} + +ROOM_FILTER_SCHEMA = { + "additionalProperties": False, + "type": "object", + "properties": { + "not_rooms": { + "$ref": "#/definitions/room_id_array" + }, + "rooms": { + "$ref": "#/definitions/room_id_array" + }, + "ephemeral": { + "$ref": "#/definitions/room_event_filter" + }, + "include_leave": { + "type": "boolean" + }, + "state": { + "$ref": "#/definitions/room_event_filter" + }, + "timeline": { + "$ref": "#/definitions/room_event_filter" + }, + "account_data": { + "$ref": "#/definitions/room_event_filter" + }, + } +} + +ROOM_EVENT_FILTER_SCHEMA = { + "additionalProperties": False, + "type": "object", + "properties": { + "limit": { + "type": "number" + }, + "senders": { + "$ref": "#/definitions/user_id_array" + }, + "not_senders": { + "$ref": "#/definitions/user_id_array" + }, + "types": { + "type": "array", + "items": { + "type": "string" + } + }, + "not_types": { + "type": "array", + "items": { + "type": "string" + } + }, + "rooms": { + "$ref": "#/definitions/room_id_array" + }, + "not_rooms": { + "$ref": "#/definitions/room_id_array" + }, + "contains_url": { + "type": "boolean" + } + } +} + +USER_ID_ARRAY_SCHEMA = { + "type": "array", + "items": { + "type": "string", + "format": "matrix_user_id" + } +} + +ROOM_ID_ARRAY_SCHEMA = { + "type": "array", + "items": { + "type": "string", + "format": "matrix_room_id" + } +} + +USER_FILTER_SCHEMA = { + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "schema for a Sync filter", + "type": "object", + "definitions": { + "room_id_array": ROOM_ID_ARRAY_SCHEMA, + "user_id_array": USER_ID_ARRAY_SCHEMA, + "filter": FILTER_SCHEMA, + "room_filter": ROOM_FILTER_SCHEMA, + "room_event_filter": ROOM_EVENT_FILTER_SCHEMA + }, + "properties": { + "presence": { + "$ref": "#/definitions/filter" + }, + "account_data": { + "$ref": "#/definitions/filter" + }, + "room": { + "$ref": "#/definitions/room_filter" + }, + "event_format": { + "type": "string", + "enum": ["client", "federation"] + }, + "event_fields": { + "type": "array", + "items": { + "type": "string", + # Don't allow '\\' in event field filters. This makes matching + # events a lot easier as we can then use a negative lookbehind + # assertion to split '\.' If we allowed \\ then it would + # incorrectly split '\\.' See synapse.events.utils.serialize_event + "pattern": "^((?!\\\).)*$" + } + } + }, + "additionalProperties": False +} + + +@FormatChecker.cls_checks('matrix_room_id') +def matrix_room_id_validator(room_id_str): + return RoomID.from_string(room_id_str) + + +@FormatChecker.cls_checks('matrix_user_id') +def matrix_user_id_validator(user_id_str): + return UserID.from_string(user_id_str) class Filtering(object): @@ -52,98 +215,11 @@ class Filtering(object): # NB: Filters are the complete json blobs. "Definitions" are an # individual top-level key e.g. public_user_data. Filters are made of # many definitions. - - top_level_definitions = [ - "presence", "account_data" - ] - - room_level_definitions = [ - "state", "timeline", "ephemeral", "account_data" - ] - - for key in top_level_definitions: - if key in user_filter_json: - self._check_definition(user_filter_json[key]) - - if "room" in user_filter_json: - self._check_definition_room_lists(user_filter_json["room"]) - for key in room_level_definitions: - if key in user_filter_json["room"]: - self._check_definition(user_filter_json["room"][key]) - - if "event_fields" in user_filter_json: - if type(user_filter_json["event_fields"]) != list: - raise SynapseError(400, "event_fields must be a list of strings") - for field in user_filter_json["event_fields"]: - if not isinstance(field, basestring): - raise SynapseError(400, "Event field must be a string") - # Don't allow '\\' in event field filters. This makes matching - # events a lot easier as we can then use a negative lookbehind - # assertion to split '\.' If we allowed \\ then it would - # incorrectly split '\\.' See synapse.events.utils.serialize_event - if r'\\' in field: - raise SynapseError( - 400, r'The escape character \ cannot itself be escaped' - ) - - def _check_definition_room_lists(self, definition): - """Check that "rooms" and "not_rooms" are lists of room ids if they - are present - - Args: - definition(dict): The filter definition - Raises: - SynapseError: If there was a problem with this definition. - """ - # check rooms are valid room IDs - room_id_keys = ["rooms", "not_rooms"] - for key in room_id_keys: - if key in definition: - if type(definition[key]) != list: - raise SynapseError(400, "Expected %s to be a list." % key) - for room_id in definition[key]: - RoomID.from_string(room_id) - - def _check_definition(self, definition): - """Check if the provided definition is valid. - - This inspects not only the types but also the values to make sure they - make sense. - - Args: - definition(dict): The filter definition - Raises: - SynapseError: If there was a problem with this definition. - """ - # NB: Filters are the complete json blobs. "Definitions" are an - # individual top-level key e.g. public_user_data. Filters are made of - # many definitions. - if type(definition) != dict: - raise SynapseError( - 400, "Expected JSON object, not %s" % (definition,) - ) - - self._check_definition_room_lists(definition) - - # check senders are valid user IDs - user_id_keys = ["senders", "not_senders"] - for key in user_id_keys: - if key in definition: - if type(definition[key]) != list: - raise SynapseError(400, "Expected %s to be a list." % key) - for user_id in definition[key]: - UserID.from_string(user_id) - - # TODO: We don't limit event type values but we probably should... - # check types are valid event types - event_keys = ["types", "not_types"] - for key in event_keys: - if key in definition: - if type(definition[key]) != list: - raise SynapseError(400, "Expected %s to be a list." % key) - for event_type in definition[key]: - if not isinstance(event_type, basestring): - raise SynapseError(400, "Event type should be a string") + try: + jsonschema.validate(user_filter_json, USER_FILTER_SCHEMA, + format_checker=FormatChecker()) + except jsonschema.ValidationError as e: + raise SynapseError(400, e.message) class FilterCollection(object): @@ -253,19 +329,35 @@ class Filter(object): Returns: bool: True if the event matches """ - sender = event.get("sender", None) - if not sender: - # Presence events have their 'sender' in content.user_id - content = event.get("content") - # account_data has been allowed to have non-dict content, so check type first - if isinstance(content, dict): - sender = content.get("user_id") + # We usually get the full "events" as dictionaries coming through, + # except for presence which actually gets passed around as its own + # namedtuple type. + if isinstance(event, UserPresenceState): + sender = event.user_id + room_id = None + ev_type = "m.presence" + is_url = False + else: + sender = event.get("sender", None) + if not sender: + # Presence events had their 'sender' in content.user_id, but are + # now handled above. We don't know if anything else uses this + # form. TODO: Check this and probably remove it. + content = event.get("content") + # account_data has been allowed to have non-dict content, so + # check type first + if isinstance(content, dict): + sender = content.get("user_id") + + room_id = event.get("room_id", None) + ev_type = event.get("type", None) + is_url = "url" in event.get("content", {}) return self.check_fields( - event.get("room_id", None), + room_id, sender, - event.get("type", None), - "url" in event.get("content", {}) + ev_type, + is_url, ) def check_fields(self, room_id, sender, event_type, contains_url):