From d81602b75afc2c39314da1f9a21be436134e5361 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 31 Jul 2018 13:52:49 +0100 Subject: Add helper base class for generating new replication endpoints This will hopefully reduce the boiler plate required to implement new internal HTTP requests. --- synapse/replication/http/_base.py | 208 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 synapse/replication/http/_base.py (limited to 'synapse/replication/http/_base.py') diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py new file mode 100644 index 0000000000..24f00d95fc --- /dev/null +++ b/synapse/replication/http/_base.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import logging +import re + +from six.moves import urllib + +from twisted.internet import defer + +from synapse.api.errors import ( + CodeMessageException, + MatrixCodeMessageException, + SynapseError, +) +from synapse.util.caches.response_cache import ResponseCache +from synapse.util.stringutils import random_string + +logger = logging.getLogger(__name__) + + +class ReplicationEndpoint(object): + """Helper base class for defining new replication HTTP endpoints. + + This creates an endpoint under `/_synapse/replication/:NAME/:PATH_ARGS..` + (with an `/:txn_id` prefix for cached requests.), where NAME is a name, + PATH_ARGS are a tuple of parameters to be encoded in the URL. + + For example, if `NAME` is "send_event" and `PATH_ARGS` is `("event_id",)`, + with `CACHE` set to true then this generates an endpoint: + + /_synapse/replication/send_event/:event_id/:txn_id + + For POST requests the payload is serialized to json and sent as the body, + while for GET requests the payload is added as query parameters. See + `_serialize_payload` for details. + + Incoming requests are handled by overriding `_handle_request`. Servers + must call `register` to register the path with the HTTP server. + + Requests can be sent by calling the client returned by `make_client`. + + Attributes: + NAME (str): A name for the endpoint, added to the path as well as used + in logging and metrics. + PATH_ARGS (tuple[str]): A list of parameters to be added to the path. + Adding parameters to the path (rather than payload) can make it + easier to follow along in the log files. + POST (bool): True to use POST request with JSON body, or false to use + GET requests with query params. + CACHE (bool): Whether server should cache the result of the request/ + If true then transparently adds a txn_id to all requests, and + `_handle_request` must return a Deferred. + RETRY_ON_TIMEOUT(bool): Whether or not to retry the request when a 504 + is received. + """ + + __metaclass__ = abc.ABCMeta + + NAME = abc.abstractproperty() + PATH_ARGS = abc.abstractproperty() + + POST = True + CACHE = True + RETRY_ON_TIMEOUT = True + + def __init__(self, hs): + if self.CACHE: + self.response_cache = ResponseCache( + hs, "repl." + self.NAME, + timeout_ms=30 * 60 * 1000, + ) + + @abc.abstractmethod + def _serialize_payload(**kwargs): + """Static method that is called when creating a request. + + Concrete implementations should have explicit parameters (rather than + kwargs) so that an appropriate exception is raised if the client is + called with unexpected parameters. All PATH_ARGS must appear in + argument list. + + Returns: + Deferred[dict]|dict: If POST request then dictionary must be JSON + serialisable, otherwise must be appropriate for adding as query + args. + """ + return {} + + @abc.abstractmethod + def _handle_request(self, request, **kwargs): + """Handle incoming request. + + This is called with the request object and PATH_ARGS. + + Returns: + Deferred[dict]: A JSON serialisable dict to be used as response + body of request. + """ + pass + + @classmethod + def make_client(cls, hs): + """Create a client that makes requests. + + Returns a callable that accepts the same parameters as `_serialize_payload`. + """ + clock = hs.get_clock() + host = hs.config.worker_replication_host + port = hs.config.worker_replication_http_port + + client = hs.get_simple_http_client() + + @defer.inlineCallbacks + def send_request(**kwargs): + data = yield cls._serialize_payload(**kwargs) + + url_args = [urllib.parse.quote(kwargs[name]) for name in cls.PATH_ARGS] + + if cls.CACHE: + txn_id = random_string(10) + url_args.append(txn_id) + + if cls.POST: + request_func = client.post_json_get_json + else: + request_func = client.get_json + + uri = "http://%s:%s/_synapse/replication/%s/%s" % ( + host, port, cls.NAME, "/".join(url_args) + ) + + try: + # We keep retrying the same request for timeouts. This is so that we + # have a good idea that the request has either succeeded or failed on + # the master, and so whether we should clean up or not. + while True: + try: + result = yield request_func(uri, data) + break + except CodeMessageException as e: + if e.code != 504 or not cls.RETRY_ON_TIMEOUT: + raise + + logger.warn("send_federation_events_to_master request timed out") + + # If we timed out we probably don't need to worry about backing + # off too much, but lets just wait a little anyway. + yield clock.sleep(1) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + + defer.returnValue(result) + + return send_request + + def register(self, http_server): + """Called by the server to register this as a handler to the + appropriate path. + """ + + url_args = list(self.PATH_ARGS) + method = "GET" + handler = self._handle_request + if self.POST: + method = "POST" + + if self.CACHE: + handler = self._cached_handler + url_args.append("txn_id") + + args = "/".join("(?P<%s>[^/]+)" % (arg,) for arg in url_args) + pattern = re.compile("^/_synapse/replication/%s/%s$" % ( + self.NAME, + args + )) + + http_server.register_paths(method, [pattern], handler) + + def _cached_handler(self, request, txn_id, **kwargs): + """Wraps `_handle_request` the responses should be cached. + """ + # We just use the txn_id here, but we probably also want to use the + # other PATH_ARGS as well. + + assert self.CACHE + + return self.response_cache.wrap( + txn_id, + self._handle_request, + request, **kwargs + ) -- cgit 1.4.1 From 051a99c4006a7deb1f9256df0a25c702dcdb451d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 6 Aug 2018 14:29:31 +0100 Subject: Fix isort --- synapse/replication/http/_base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'synapse/replication/http/_base.py') diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index a7d1b2dabe..4de3825fda 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -21,10 +21,7 @@ from six.moves import urllib from twisted.internet import defer -from synapse.api.errors import ( - CodeMessageException, - HttpResponseException, -) +from synapse.api.errors import CodeMessageException, HttpResponseException from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import random_string -- cgit 1.4.1 From 501141763245647f41636621867ad1bacc47e6b5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Aug 2018 10:29:58 +0100 Subject: Fixup logging and docstrings --- synapse/replication/http/_base.py | 6 ++++-- synapse/replication/http/membership.py | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) (limited to 'synapse/replication/http/_base.py') diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 4de3825fda..619ddab540 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -151,7 +151,7 @@ class ReplicationEndpoint(object): if e.code != 504 or not cls.RETRY_ON_TIMEOUT: raise - logger.warn("send_federation_events_to_master request timed out") + logger.warn("%s request timed out", cls.NAME) # If we timed out we probably don't need to worry about backing # off too much, but lets just wait a little anyway. @@ -190,7 +190,9 @@ class ReplicationEndpoint(object): http_server.register_paths(method, [pattern], handler) def _cached_handler(self, request, txn_id, **kwargs): - """Wraps `_handle_request` the responses should be cached. + """Called on new incoming requests when caching is enabled. Checks + if their is a cached response for the request and returns that, + otherwise calls `_handle_request` and caches its response. """ # We just use the txn_id here, but we probably also want to use the # other PATH_ARGS as well. diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index 8ad83e8421..e58bebf12a 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -27,6 +27,16 @@ logger = logging.getLogger(__name__) class ReplicationRemoteJoinRestServlet(ReplicationEndpoint): """Does a remote join for the given user to the given room + + Request format: + + POST /_synapse/replication/remote_join/:room_id/:user_id + + { + "requester": ..., + "remote_room_hosts": [...], + "content": { ... } + } """ NAME = "remote_join" @@ -85,6 +95,15 @@ class ReplicationRemoteJoinRestServlet(ReplicationEndpoint): class ReplicationRemoteRejectInviteRestServlet(ReplicationEndpoint): """Rejects the invite for the user and room. + + Request format: + + POST /_synapse/replication/remote_reject_invite/:room_id/:user_id + + { + "requester": ..., + "remote_room_hosts": [...], + } """ NAME = "remote_reject_invite" @@ -153,6 +172,17 @@ class ReplicationRemoteRejectInviteRestServlet(ReplicationEndpoint): class ReplicationRegister3PIDGuestRestServlet(ReplicationEndpoint): """Gets/creates a guest account for given 3PID. + + Request format: + + POST /_synapse/replication/get_or_register_3pid_guest/ + + { + "requester": ..., + "medium": ..., + "address": ..., + "inviter_user_id": ... + } """ NAME = "get_or_register_3pid_guest" @@ -206,6 +236,12 @@ class ReplicationRegister3PIDGuestRestServlet(ReplicationEndpoint): class ReplicationUserJoinedLeftRoomRestServlet(ReplicationEndpoint): """Notifies that a user has joined or left the room + + Request format: + + POST /_synapse/replication/membership_change/:room_id/:user_id/:change + + {} """ NAME = "membership_change" -- cgit 1.4.1 From bebe325e6cdd83f7bacd8ad71f6cdd23273c88db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Aug 2018 10:35:47 +0100 Subject: Rename POST param to METHOD --- synapse/replication/http/_base.py | 34 ++++++++++++++++++++++------------ synapse/replication/http/send_event.py | 1 - 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'synapse/replication/http/_base.py') diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 619ddab540..53a0fd459a 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -40,8 +40,8 @@ class ReplicationEndpoint(object): /_synapse/replication/send_event/:event_id/:txn_id - For POST requests the payload is serialized to json and sent as the body, - while for GET requests the payload is added as query parameters. See + For POST/PUT requests the payload is serialized to json and sent as the + body, while for GET requests the payload is added as query parameters. See `_serialize_payload` for details. Incoming requests are handled by overriding `_handle_request`. Servers @@ -55,8 +55,9 @@ class ReplicationEndpoint(object): PATH_ARGS (tuple[str]): A list of parameters to be added to the path. Adding parameters to the path (rather than payload) can make it easier to follow along in the log files. - POST (bool): True to use POST request with JSON body, or false to use - GET requests with query params. + METHOD (str): The method of the HTTP request, defaults to POST. Can be + one of POST, PUT or GET. If GET then the payload is sent as query + parameters rather than a JSON body. CACHE (bool): Whether server should cache the result of the request/ If true then transparently adds a txn_id to all requests, and `_handle_request` must return a Deferred. @@ -69,7 +70,7 @@ class ReplicationEndpoint(object): NAME = abc.abstractproperty() PATH_ARGS = abc.abstractproperty() - POST = True + METHOD = "POST" CACHE = True RETRY_ON_TIMEOUT = True @@ -80,6 +81,8 @@ class ReplicationEndpoint(object): timeout_ms=30 * 60 * 1000, ) + assert self.METHOD in ("PUT", "POST", "GET") + @abc.abstractmethod def _serialize_payload(**kwargs): """Static method that is called when creating a request. @@ -90,9 +93,9 @@ class ReplicationEndpoint(object): argument list. Returns: - Deferred[dict]|dict: If POST request then dictionary must be JSON - serialisable, otherwise must be appropriate for adding as query - args. + Deferred[dict]|dict: If POST/PUT request then dictionary must be + JSON serialisable, otherwise must be appropriate for adding as + query args. """ return {} @@ -130,10 +133,18 @@ class ReplicationEndpoint(object): txn_id = random_string(10) url_args.append(txn_id) - if cls.POST: + if cls.METHOD == "POST": request_func = client.post_json_get_json - else: + elif cls.METHOD == "PUT": + request_func = client.put_json + elif cls.METHOD == "GET": request_func = client.get_json + else: + # We have already asserted in the constructor that a + # compatible was picked, but lets be paranoid. + raise Exception( + "Unknown METHOD on %s replication endpoint" % (cls.NAME,) + ) uri = "http://%s:%s/_synapse/replication/%s/%s" % ( host, port, cls.NAME, "/".join(url_args) @@ -174,8 +185,7 @@ class ReplicationEndpoint(object): url_args = list(self.PATH_ARGS) method = "GET" handler = self._handle_request - if self.POST: - method = "POST" + method = self.METHOD if self.CACHE: handler = self._cached_handler diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 50810d94cb..5b52c91650 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -47,7 +47,6 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): """ NAME = "send_event" PATH_ARGS = ("event_id",) - POST = True def __init__(self, hs): super(ReplicationSendEventRestServlet, self).__init__(hs) -- cgit 1.4.1 From 62564797f5f9c6b1295a98a9742ae226b87a135e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Aug 2018 09:56:10 +0100 Subject: Fixup wording and remove dead code --- changelog.d/3632.misc | 1 + synapse/replication/http/_base.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/replication/http/_base.py') diff --git a/changelog.d/3632.misc b/changelog.d/3632.misc index e69de29bb2..9d64bbe83b 100644 --- a/changelog.d/3632.misc +++ b/changelog.d/3632.misc @@ -0,0 +1 @@ +Refactor HTTP replication endpoints to reduce code duplication diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 53a0fd459a..5e5376cf58 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -183,7 +183,6 @@ class ReplicationEndpoint(object): """ url_args = list(self.PATH_ARGS) - method = "GET" handler = self._handle_request method = self.METHOD @@ -201,7 +200,7 @@ class ReplicationEndpoint(object): def _cached_handler(self, request, txn_id, **kwargs): """Called on new incoming requests when caching is enabled. Checks - if their is a cached response for the request and returns that, + if there is a cached response for the request and returns that, otherwise calls `_handle_request` and caches its response. """ # We just use the txn_id here, but we probably also want to use the -- cgit 1.4.1