From c127c8d0421f0228a46ebbe280c9537e8d8ea42b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 4 Sep 2018 01:23:18 +0100 Subject: Fix origin handling for pushed transactions Use the actual origin for push transactions, rather than whatever the remote server claimed. --- synapse/federation/federation_server.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 3e0cd294a1..547c6aec80 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -99,7 +99,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - def on_incoming_transaction(self, transaction_data): + def on_incoming_transaction(self, origin, transaction_data): # keep this as early as possible to make the calculated origin ts as # accurate as possible. request_time = self._clock.time_msec() @@ -108,34 +108,33 @@ class FederationServer(FederationBase): if not transaction.transaction_id: raise Exception("Transaction missing transaction_id") - if not transaction.origin: - raise Exception("Transaction missing origin") logger.debug("[%s] Got transaction", transaction.transaction_id) # use a linearizer to ensure that we don't process the same transaction # multiple times in parallel. with (yield self._transaction_linearizer.queue( - (transaction.origin, transaction.transaction_id), + (origin, transaction.transaction_id), )): result = yield self._handle_incoming_transaction( - transaction, request_time, + origin, transaction, request_time, ) defer.returnValue(result) @defer.inlineCallbacks - def _handle_incoming_transaction(self, transaction, request_time): + def _handle_incoming_transaction(self, origin, transaction, request_time): """ Process an incoming transaction and return the HTTP response Args: + origin (unicode): the server making the request transaction (Transaction): incoming transaction request_time (int): timestamp that the HTTP request arrived at Returns: Deferred[(int, object)]: http response code and body """ - response = yield self.transaction_actions.have_responded(transaction) + response = yield self.transaction_actions.have_responded(origin, transaction) if response: logger.debug( @@ -149,7 +148,7 @@ class FederationServer(FederationBase): received_pdus_counter.inc(len(transaction.pdus)) - origin_host, _ = parse_server_name(transaction.origin) + origin_host, _ = parse_server_name(origin) pdus_by_room = {} @@ -190,7 +189,7 @@ class FederationServer(FederationBase): event_id = pdu.event_id try: yield self._handle_received_pdu( - transaction.origin, pdu + origin, pdu ) pdu_results[event_id] = {} except FederationError as e: @@ -212,7 +211,7 @@ class FederationServer(FederationBase): if hasattr(transaction, "edus"): for edu in (Edu(**x) for x in transaction.edus): yield self.received_edu( - transaction.origin, + origin, edu.edu_type, edu.content ) @@ -224,6 +223,7 @@ class FederationServer(FederationBase): logger.debug("Returning: %s", str(response)) yield self.transaction_actions.set_response( + origin, transaction, 200, response ) -- cgit 1.5.1 From 2d2828dcbc2c6360d28a64d3849cf849eb5348c4 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 6 Sep 2018 00:10:47 +1000 Subject: Port http/ to Python 3 (#3771) --- changelog.d/3771.misc | 1 + setup.cfg | 5 +- synapse/appservice/api.py | 13 +- synapse/federation/federation_server.py | 10 +- synapse/http/client.py | 82 +++++-------- synapse/http/matrixfederationclient.py | 204 +++++++++++++------------------- synapse/http/site.py | 4 +- synapse/python_dependencies.py | 1 + 8 files changed, 134 insertions(+), 186 deletions(-) create mode 100644 changelog.d/3771.misc (limited to 'synapse/federation/federation_server.py') diff --git a/changelog.d/3771.misc b/changelog.d/3771.misc new file mode 100644 index 0000000000..47aa34bc04 --- /dev/null +++ b/changelog.d/3771.misc @@ -0,0 +1 @@ +http/ is now ported to Python 3. diff --git a/setup.cfg b/setup.cfg index c2620be6c5..52feaa9cc7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,13 +17,14 @@ ignore = [pep8] max-line-length = 90 # W503 requires that binary operators be at the end, not start, of lines. Erik -# doesn't like it. E203 is contrary to PEP8. -ignore = W503,E203 +# doesn't like it. E203 is contrary to PEP8. E731 is silly. +ignore = W503,E203,E731 [flake8] # note that flake8 inherits the "ignore" settings from "pep8" (because it uses # pep8 to do those checks), but not the "max-line-length" setting max-line-length = 90 +ignore=W503,E203,E731 [isort] line_length = 89 diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 6980e5890e..9ccc5a80fc 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import urllib + +from six.moves import urllib from prometheus_client import Counter @@ -98,7 +99,7 @@ class ApplicationServiceApi(SimpleHttpClient): def query_user(self, service, user_id): if service.url is None: defer.returnValue(False) - uri = service.url + ("/users/%s" % urllib.quote(user_id)) + uri = service.url + ("/users/%s" % urllib.parse.quote(user_id)) response = None try: response = yield self.get_json(uri, { @@ -119,7 +120,7 @@ class ApplicationServiceApi(SimpleHttpClient): def query_alias(self, service, alias): if service.url is None: defer.returnValue(False) - uri = service.url + ("/rooms/%s" % urllib.quote(alias)) + uri = service.url + ("/rooms/%s" % urllib.parse.quote(alias)) response = None try: response = yield self.get_json(uri, { @@ -153,7 +154,7 @@ class ApplicationServiceApi(SimpleHttpClient): service.url, APP_SERVICE_PREFIX, kind, - urllib.quote(protocol) + urllib.parse.quote(protocol) ) try: response = yield self.get_json(uri, fields) @@ -188,7 +189,7 @@ class ApplicationServiceApi(SimpleHttpClient): uri = "%s%s/thirdparty/protocol/%s" % ( service.url, APP_SERVICE_PREFIX, - urllib.quote(protocol) + urllib.parse.quote(protocol) ) try: info = yield self.get_json(uri, {}) @@ -228,7 +229,7 @@ class ApplicationServiceApi(SimpleHttpClient): txn_id = str(txn_id) uri = service.url + ("/transactions/%s" % - urllib.quote(txn_id)) + urllib.parse.quote(txn_id)) try: yield self.put_json( uri=uri, diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 3e0cd294a1..6e52c4b6b5 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -838,9 +838,9 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry): ) return self._send_edu( - edu_type=edu_type, - origin=origin, - content=content, + edu_type=edu_type, + origin=origin, + content=content, ) def on_query(self, query_type, args): @@ -851,6 +851,6 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry): return handler(args) return self._get_query_client( - query_type=query_type, - args=args, + query_type=query_type, + args=args, ) diff --git a/synapse/http/client.py b/synapse/http/client.py index ab4fbf59b2..4ba54fed05 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -13,24 +13,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging -import urllib -from six import StringIO +from six import text_type +from six.moves import urllib +import treq from canonicaljson import encode_canonical_json, json from prometheus_client import Counter from OpenSSL import SSL from OpenSSL.SSL import VERIFY_NONE -from twisted.internet import defer, protocol, reactor, ssl, task +from twisted.internet import defer, protocol, reactor, ssl from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web._newclient import ResponseDone from twisted.web.client import ( Agent, BrowserLikeRedirectAgent, ContentDecoderAgent, - FileBodyProducer as TwistedFileBodyProducer, GzipDecoder, HTTPConnectionPool, PartialDownloadError, @@ -83,18 +84,20 @@ class SimpleHttpClient(object): if hs.config.user_agent_suffix: self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix,) + self.user_agent = self.user_agent.encode('ascii') + @defer.inlineCallbacks - def request(self, method, uri, *args, **kwargs): + def request(self, method, uri, data=b'', headers=None): # A small wrapper around self.agent.request() so we can easily attach # counters to it outgoing_requests_counter.labels(method).inc() # log request but strip `access_token` (AS requests for example include this) - logger.info("Sending request %s %s", method, redact_uri(uri)) + logger.info("Sending request %s %s", method, redact_uri(uri.encode('ascii'))) try: - request_deferred = self.agent.request( - method, uri, *args, **kwargs + request_deferred = treq.request( + method, uri, agent=self.agent, data=data, headers=headers ) add_timeout_to_deferred( request_deferred, 60, self.hs.get_reactor(), @@ -105,14 +108,14 @@ class SimpleHttpClient(object): incoming_responses_counter.labels(method, response.code).inc() logger.info( "Received response to %s %s: %s", - method, redact_uri(uri), response.code + method, redact_uri(uri.encode('ascii')), response.code ) defer.returnValue(response) except Exception as e: incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", - method, redact_uri(uri), type(e).__name__, e.message + method, redact_uri(uri.encode('ascii')), type(e).__name__, e.args[0] ) raise @@ -137,7 +140,8 @@ class SimpleHttpClient(object): # TODO: Do we ever want to log message contents? logger.debug("post_urlencoded_get_json args: %s", args) - query_bytes = urllib.urlencode(encode_urlencode_args(args), True) + query_bytes = urllib.parse.urlencode( + encode_urlencode_args(args), True).encode("utf8") actual_headers = { b"Content-Type": [b"application/x-www-form-urlencoded"], @@ -148,15 +152,14 @@ class SimpleHttpClient(object): response = yield self.request( "POST", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), - bodyProducer=FileBodyProducer(StringIO(query_bytes)) + data=query_bytes ) - body = yield make_deferred_yieldable(readBody(response)) - if 200 <= response.code < 300: - defer.returnValue(json.loads(body)) + body = yield make_deferred_yieldable(treq.json_content(response)) + defer.returnValue(body) else: raise HttpResponseException(response.code, response.phrase, body) @@ -191,9 +194,9 @@ class SimpleHttpClient(object): response = yield self.request( "POST", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), - bodyProducer=FileBodyProducer(StringIO(json_str)) + data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -248,7 +251,7 @@ class SimpleHttpClient(object): ValueError: if the response was not JSON """ if len(args): - query_bytes = urllib.urlencode(args, True) + query_bytes = urllib.parse.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) json_str = encode_canonical_json(json_body) @@ -262,9 +265,9 @@ class SimpleHttpClient(object): response = yield self.request( "PUT", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), - bodyProducer=FileBodyProducer(StringIO(json_str)) + data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -293,7 +296,7 @@ class SimpleHttpClient(object): HttpResponseException on a non-2xx HTTP response. """ if len(args): - query_bytes = urllib.urlencode(args, True) + query_bytes = urllib.parse.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) actual_headers = { @@ -304,7 +307,7 @@ class SimpleHttpClient(object): response = yield self.request( "GET", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), ) @@ -339,7 +342,7 @@ class SimpleHttpClient(object): response = yield self.request( "GET", - url.encode("ascii"), + url, headers=Headers(actual_headers), ) @@ -434,12 +437,12 @@ class CaptchaServerHttpClient(SimpleHttpClient): @defer.inlineCallbacks def post_urlencoded_get_raw(self, url, args={}): - query_bytes = urllib.urlencode(encode_urlencode_args(args), True) + query_bytes = urllib.parse.urlencode(encode_urlencode_args(args), True) response = yield self.request( "POST", - url.encode("ascii"), - bodyProducer=FileBodyProducer(StringIO(query_bytes)), + url, + data=query_bytes, headers=Headers({ b"Content-Type": [b"application/x-www-form-urlencoded"], b"User-Agent": [self.user_agent], @@ -510,7 +513,7 @@ def encode_urlencode_args(args): def encode_urlencode_arg(arg): - if isinstance(arg, unicode): + if isinstance(arg, text_type): return arg.encode('utf-8') elif isinstance(arg, list): return [encode_urlencode_arg(i) for i in arg] @@ -542,26 +545,3 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory): def creatorForNetloc(self, hostname, port): return self - - -class FileBodyProducer(TwistedFileBodyProducer): - """Workaround for https://twistedmatrix.com/trac/ticket/8473 - - We override the pauseProducing and resumeProducing methods in twisted's - FileBodyProducer so that they do not raise exceptions if the task has - already completed. - """ - - def pauseProducing(self): - try: - super(FileBodyProducer, self).pauseProducing() - except task.TaskDone: - # task has already completed - pass - - def resumeProducing(self): - try: - super(FileBodyProducer, self).resumeProducing() - except task.NotPaused: - # task was not paused (probably because it had already completed) - pass diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index b34bb8e31a..6a1fc8ca55 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -17,19 +17,19 @@ import cgi import logging import random import sys -import urllib -from six import string_types -from six.moves.urllib import parse as urlparse +from six import PY3, string_types +from six.moves import urllib -from canonicaljson import encode_canonical_json, json +import treq +from canonicaljson import encode_canonical_json from prometheus_client import Counter from signedjson.sign import sign_json from twisted.internet import defer, protocol, reactor from twisted.internet.error import DNSLookupError from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, HTTPConnectionPool, readBody +from twisted.web.client import Agent, HTTPConnectionPool from twisted.web.http_headers import Headers import synapse.metrics @@ -58,13 +58,18 @@ incoming_responses_counter = Counter("synapse_http_matrixfederationclient_respon MAX_LONG_RETRIES = 10 MAX_SHORT_RETRIES = 3 +if PY3: + MAXINT = sys.maxsize +else: + MAXINT = sys.maxint + class MatrixFederationEndpointFactory(object): def __init__(self, hs): self.tls_client_options_factory = hs.tls_client_options_factory def endpointForURI(self, uri): - destination = uri.netloc + destination = uri.netloc.decode('ascii') return matrix_federation_endpoint( reactor, destination, timeout=10, @@ -93,26 +98,32 @@ class MatrixFederationHttpClient(object): ) self.clock = hs.get_clock() self._store = hs.get_datastore() - self.version_string = hs.version_string + self.version_string = hs.version_string.encode('ascii') self._next_id = 1 def _create_url(self, destination, path_bytes, param_bytes, query_bytes): - return urlparse.urlunparse( - ("matrix", destination, path_bytes, param_bytes, query_bytes, "") + return urllib.parse.urlunparse( + (b"matrix", destination, path_bytes, param_bytes, query_bytes, b"") ) @defer.inlineCallbacks def _request(self, destination, method, path, - body_callback, headers_dict={}, param_bytes=b"", - query_bytes=b"", retry_on_dns_fail=True, + json=None, json_callback=None, + param_bytes=b"", + query=None, retry_on_dns_fail=True, timeout=None, long_retries=False, ignore_backoff=False, backoff_on_404=False): - """ Creates and sends a request to the given server + """ + Creates and sends a request to the given server. + Args: destination (str): The remote server to send the HTTP request to. method (str): HTTP method path (str): The HTTP path + json (dict or None): JSON to send in the body. + json_callback (func or None): A callback to generate the JSON. + query (dict or None): Query arguments. ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. backoff_on_404 (bool): Back off if we get a 404 @@ -146,22 +157,29 @@ class MatrixFederationHttpClient(object): ignore_backoff=ignore_backoff, ) - destination = destination.encode("ascii") + headers_dict = {} path_bytes = path.encode("ascii") - with limiter: - headers_dict[b"User-Agent"] = [self.version_string] - headers_dict[b"Host"] = [destination] + if query: + query_bytes = encode_query_args(query) + else: + query_bytes = b"" - url_bytes = self._create_url( - destination, path_bytes, param_bytes, query_bytes - ) + headers_dict = { + "User-Agent": [self.version_string], + "Host": [destination], + } + + with limiter: + url = self._create_url( + destination.encode("ascii"), path_bytes, param_bytes, query_bytes + ).decode('ascii') txn_id = "%s-O-%s" % (method, self._next_id) - self._next_id = (self._next_id + 1) % (sys.maxint - 1) + self._next_id = (self._next_id + 1) % (MAXINT - 1) outbound_logger.info( "{%s} [%s] Sending request: %s %s", - txn_id, destination, method, url_bytes + txn_id, destination, method, url ) # XXX: Would be much nicer to retry only at the transaction-layer @@ -171,23 +189,33 @@ class MatrixFederationHttpClient(object): else: retries_left = MAX_SHORT_RETRIES - http_url_bytes = urlparse.urlunparse( - ("", "", path_bytes, param_bytes, query_bytes, "") - ) + http_url = urllib.parse.urlunparse( + (b"", b"", path_bytes, param_bytes, query_bytes, b"") + ).decode('ascii') log_result = None try: while True: - producer = None - if body_callback: - producer = body_callback(method, http_url_bytes, headers_dict) - try: - request_deferred = self.agent.request( + if json_callback: + json = json_callback() + + if json: + data = encode_canonical_json(json) + headers_dict["Content-Type"] = ["application/json"] + self.sign_request( + destination, method, http_url, headers_dict, json + ) + else: + data = None + self.sign_request(destination, method, http_url, headers_dict) + + request_deferred = treq.request( method, - url_bytes, - Headers(headers_dict), - producer + url, + headers=Headers(headers_dict), + data=data, + agent=self.agent, ) add_timeout_to_deferred( request_deferred, @@ -218,7 +246,7 @@ class MatrixFederationHttpClient(object): txn_id, destination, method, - url_bytes, + url, _flatten_response_never_received(e), ) @@ -252,7 +280,7 @@ class MatrixFederationHttpClient(object): # :'( # Update transactions table? with logcontext.PreserveLoggingContext(): - body = yield readBody(response) + body = yield treq.content(response) raise HttpResponseException( response.code, response.phrase, body ) @@ -297,11 +325,11 @@ class MatrixFederationHttpClient(object): auth_headers = [] for key, sig in request["signatures"][self.server_name].items(): - auth_headers.append(bytes( + auth_headers.append(( "X-Matrix origin=%s,key=\"%s\",sig=\"%s\"" % ( self.server_name, key, sig, - ) - )) + )).encode('ascii') + ) headers_dict[b"Authorization"] = auth_headers @@ -347,24 +375,14 @@ class MatrixFederationHttpClient(object): """ if not json_data_callback: - def json_data_callback(): - return data - - def body_callback(method, url_bytes, headers_dict): - json_data = json_data_callback() - self.sign_request( - destination, method, url_bytes, headers_dict, json_data - ) - producer = _JsonProducer(json_data) - return producer + json_data_callback = lambda: data response = yield self._request( destination, "PUT", path, - body_callback=body_callback, - headers_dict={"Content-Type": ["application/json"]}, - query_bytes=encode_query_args(args), + json_callback=json_data_callback, + query=args, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, @@ -376,8 +394,8 @@ class MatrixFederationHttpClient(object): check_content_type_is_json(response.headers) with logcontext.PreserveLoggingContext(): - body = yield readBody(response) - defer.returnValue(json.loads(body)) + body = yield treq.json_content(response) + defer.returnValue(body) @defer.inlineCallbacks def post_json(self, destination, path, data={}, long_retries=False, @@ -410,20 +428,12 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ - - def body_callback(method, url_bytes, headers_dict): - self.sign_request( - destination, method, url_bytes, headers_dict, data - ) - return _JsonProducer(data) - response = yield self._request( destination, "POST", path, - query_bytes=encode_query_args(args), - body_callback=body_callback, - headers_dict={"Content-Type": ["application/json"]}, + query=args, + json=data, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, @@ -434,9 +444,9 @@ class MatrixFederationHttpClient(object): check_content_type_is_json(response.headers) with logcontext.PreserveLoggingContext(): - body = yield readBody(response) + body = yield treq.json_content(response) - defer.returnValue(json.loads(body)) + defer.returnValue(body) @defer.inlineCallbacks def get_json(self, destination, path, args=None, retry_on_dns_fail=True, @@ -471,16 +481,11 @@ class MatrixFederationHttpClient(object): logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) - def body_callback(method, url_bytes, headers_dict): - self.sign_request(destination, method, url_bytes, headers_dict) - return None - response = yield self._request( destination, "GET", path, - query_bytes=encode_query_args(args), - body_callback=body_callback, + query=args, retry_on_dns_fail=retry_on_dns_fail, timeout=timeout, ignore_backoff=ignore_backoff, @@ -491,9 +496,9 @@ class MatrixFederationHttpClient(object): check_content_type_is_json(response.headers) with logcontext.PreserveLoggingContext(): - body = yield readBody(response) + body = yield treq.json_content(response) - defer.returnValue(json.loads(body)) + defer.returnValue(body) @defer.inlineCallbacks def delete_json(self, destination, path, long_retries=False, @@ -523,13 +528,11 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ - response = yield self._request( destination, "DELETE", path, - query_bytes=encode_query_args(args), - headers_dict={"Content-Type": ["application/json"]}, + query=args, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, @@ -540,9 +543,9 @@ class MatrixFederationHttpClient(object): check_content_type_is_json(response.headers) with logcontext.PreserveLoggingContext(): - body = yield readBody(response) + body = yield treq.json_content(response) - defer.returnValue(json.loads(body)) + defer.returnValue(body) @defer.inlineCallbacks def get_file(self, destination, path, output_stream, args={}, @@ -569,26 +572,11 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ - - encoded_args = {} - for k, vs in args.items(): - if isinstance(vs, string_types): - vs = [vs] - encoded_args[k] = [v.encode("UTF-8") for v in vs] - - query_bytes = urllib.urlencode(encoded_args, True) - logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail) - - def body_callback(method, url_bytes, headers_dict): - self.sign_request(destination, method, url_bytes, headers_dict) - return None - response = yield self._request( destination, "GET", path, - query_bytes=query_bytes, - body_callback=body_callback, + query=args, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff, ) @@ -639,30 +627,6 @@ def _readBodyToFile(response, stream, max_size): return d -class _JsonProducer(object): - """ Used by the twisted http client to create the HTTP body from json - """ - def __init__(self, jsn): - self.reset(jsn) - - def reset(self, jsn): - self.body = encode_canonical_json(jsn) - self.length = len(self.body) - - def startProducing(self, consumer): - consumer.write(self.body) - return defer.succeed(None) - - def pauseProducing(self): - pass - - def stopProducing(self): - pass - - def resumeProducing(self): - pass - - def _flatten_response_never_received(e): if hasattr(e, "reasons"): reasons = ", ".join( @@ -693,7 +657,7 @@ def check_content_type_is_json(headers): "No Content-Type header" ) - c_type = c_type[0] # only the first header + c_type = c_type[0].decode('ascii') # only the first header val, options = cgi.parse_header(c_type) if val != "application/json": raise RuntimeError( @@ -711,6 +675,6 @@ def encode_query_args(args): vs = [vs] encoded_args[k] = [v.encode("UTF-8") for v in vs] - query_bytes = urllib.urlencode(encoded_args, True) + query_bytes = urllib.parse.urlencode(encoded_args, True) - return query_bytes + return query_bytes.encode('utf8') diff --git a/synapse/http/site.py b/synapse/http/site.py index 88ed3714f9..f0828c6542 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -204,14 +204,14 @@ class SynapseRequest(Request): self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( - self.start_time, name=servlet_name, method=self.method, + self.start_time, name=servlet_name, method=self.method.decode('ascii'), ) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, - self.method, + self.method.decode('ascii'), self.get_redacted_uri() ) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 942d7c721f..6dd5179320 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -40,6 +40,7 @@ REQUIREMENTS = { "pynacl>=1.2.1": ["nacl>=1.2.1", "nacl.bindings"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "Twisted>=17.1.0": ["twisted>=17.1.0"], + "treq>=15.1": ["treq>=15.1"], # We use crypto.get_elliptic_curve which is only supported in >=0.15 "pyopenssl>=0.15": ["OpenSSL>=0.15"], -- cgit 1.5.1 From 703de4ec13b95f0c8f2e04bf8c20b3906b010592 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 20 Sep 2018 13:06:55 +0100 Subject: Comments and interface cleanup for on_receive_pdu Add some informative comments about what's going on here. Also, `sent_to_us_directly` and `get_missing` were doing the same thing (apart from in `_handle_queued_pdus`, which looks like a bug), so let's get rid of `get_missing` and use `sent_to_us_directly` consistently. --- synapse/federation/federation_server.py | 2 +- synapse/handlers/federation.py | 69 ++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 24 deletions(-) (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index dbee404ea7..9a571e4fc7 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -618,7 +618,7 @@ class FederationServer(FederationBase): ) yield self.handler.on_receive_pdu( - origin, pdu, get_missing=True, sent_to_us_directly=True, + origin, pdu, sent_to_us_directly=True, ) def __str__(self): diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8d6bd7976d..2ccdc3bfa7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -136,7 +136,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def on_receive_pdu( - self, origin, pdu, get_missing=True, sent_to_us_directly=False, + self, origin, pdu, sent_to_us_directly=False, ): """ Process a PDU received via a federation /send/ transaction, or via backfill of missing prev_events @@ -145,7 +145,8 @@ class FederationHandler(BaseHandler): origin (str): server which initiated the /send/ transaction. Will be used to fetch missing events or state. pdu (FrozenEvent): received PDU - get_missing (bool): True if we should fetch missing prev_events + sent_to_us_directly (bool): True if this event was pushed to us; False if + we pulled it as the result of a missing prev_event. Returns (Deferred): completes with None """ @@ -250,7 +251,7 @@ class FederationHandler(BaseHandler): pdu.internal_metadata.outlier = True elif min_depth and pdu.depth > min_depth: missing_prevs = prevs - seen - if get_missing and missing_prevs: + if sent_to_us_directly and missing_prevs: # If we're missing stuff, ensure we only fetch stuff one # at a time. logger.info( @@ -282,24 +283,46 @@ class FederationHandler(BaseHandler): room_id, event_id, len(missing_prevs), shortstr(missing_prevs), ) - if sent_to_us_directly and prevs - seen: - # If they have sent it to us directly, and the server - # isn't telling us about the auth events that it's - # made a message referencing, we explode - logger.warn( - "[%s %s] Failed to fetch %d prev events: rejecting", - room_id, event_id, len(prevs - seen), - ) - raise FederationError( - "ERROR", - 403, - ( - "Your server isn't divulging details about prev_events " - "referenced in this event." - ), - affected=pdu.event_id, - ) - elif prevs - seen: + if prevs - seen: + # We've still not been able to get all of the prev_events for this event. + # + # In this case, we need to fall back to asking another server in the + # federation for the state at this event. That's ok provided we then + # resolve the state against other bits of the DAG before using it (which + # will ensure that you can't just take over a room by sending an event, + # withholding its prev_events, and declaring yourself to be an admin in + # the subsequent state request). + # + # Now, if we're pulling this event as a missing prev_event, then clearly + # this event is not going to become the only forward-extremity and we are + # guaranteed to resolve its state against our existing forward + # extremities, so that should be fine. + # + # On the other hand, if this event was pushed to us, it is possible for + # it to become the only forward-extremity in the room, and we would then + # trust its state to be the state for the whole room. This is very bad. + # Further, if the event was pushed to us, there is no excuse for us not to + # have all the prev_events. We therefore reject any such events. + # + # XXX this really feels like it could/should be merged with the above, + # but there is an interaction with min_depth that I'm not really + # following. + + if sent_to_us_directly: + logger.warn( + "[%s %s] Failed to fetch %d prev events: rejecting", + room_id, event_id, len(prevs - seen), + ) + raise FederationError( + "ERROR", + 403, + ( + "Your server isn't divulging details about prev_events " + "referenced in this event." + ), + affected=pdu.event_id, + ) + # Calculate the state of the previous events, and # de-conflict them to find the current state. state_groups = [] @@ -464,7 +487,7 @@ class FederationHandler(BaseHandler): yield self.on_receive_pdu( origin, ev, - get_missing=False + sent_to_us_directly=False, ) except FederationError as e: if e.code == 403: @@ -1112,7 +1135,7 @@ class FederationHandler(BaseHandler): try: logger.info("Processing queued PDU %s which was received " "while we were joining %s", p.event_id, p.room_id) - yield self.on_receive_pdu(origin, p) + yield self.on_receive_pdu(origin, p, sent_to_us_directly=True) except Exception as e: logger.warn( "Error handling queued PDU %s from %s: %s", -- cgit 1.5.1 From 4a15a3e4d539dcea9a4a57e7cd800a926f2a17c3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 27 Sep 2018 11:25:34 +0100 Subject: Include eventid in log lines when processing incoming federation transactions (#3959) when processing incoming transactions, it can be hard to see what's going on, because we process a bunch of stuff in parallel, and because we may end up recursively working our way through a chain of three or four events. This commit creates a way to use logcontexts to add the relevant event ids to the log lines. --- changelog.d/3959.feature | 1 + synapse/federation/federation_server.py | 32 ++++++++-------- synapse/handlers/federation.py | 65 ++++++++++++++++++++------------- synapse/util/logcontext.py | 41 +++++++++++++++++++-- tests/test_federation.py | 28 ++++++++------ tests/util/test_logcontext.py | 5 +++ 6 files changed, 115 insertions(+), 57 deletions(-) create mode 100644 changelog.d/3959.feature (limited to 'synapse/federation/federation_server.py') diff --git a/changelog.d/3959.feature b/changelog.d/3959.feature new file mode 100644 index 0000000000..b3a4f37a8d --- /dev/null +++ b/changelog.d/3959.feature @@ -0,0 +1 @@ +Include eventid in log lines when processing incoming federation transactions \ No newline at end of file diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9a571e4fc7..819e8f7331 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -46,6 +46,7 @@ from synapse.replication.http.federation import ( from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer, concurrently_execute from synapse.util.caches.response_cache import ResponseCache +from synapse.util.logcontext import nested_logging_context from synapse.util.logutils import log_function # when processing incoming transactions, we try to handle multiple rooms in @@ -187,21 +188,22 @@ class FederationServer(FederationBase): for pdu in pdus_by_room[room_id]: event_id = pdu.event_id - try: - yield self._handle_received_pdu( - origin, pdu - ) - pdu_results[event_id] = {} - except FederationError as e: - logger.warn("Error handling PDU %s: %s", event_id, e) - pdu_results[event_id] = {"error": str(e)} - except Exception as e: - f = failure.Failure() - pdu_results[event_id] = {"error": str(e)} - logger.error( - "Failed to handle PDU %s: %s", - event_id, f.getTraceback().rstrip(), - ) + with nested_logging_context(event_id): + try: + yield self._handle_received_pdu( + origin, pdu + ) + pdu_results[event_id] = {} + except FederationError as e: + logger.warn("Error handling PDU %s: %s", event_id, e) + pdu_results[event_id] = {"error": str(e)} + except Exception as e: + f = failure.Failure() + pdu_results[event_id] = {"error": str(e)} + logger.error( + "Failed to handle PDU %s: %s", + event_id, f.getTraceback().rstrip(), + ) yield concurrently_execute( process_pdus_for_room, pdus_by_room.keys(), diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 2ccdc3bfa7..993546387c 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -339,14 +339,18 @@ class FederationHandler(BaseHandler): "[%s %s] Requesting state at missing prev_event %s", room_id, event_id, p, ) - state, got_auth_chain = ( - yield self.federation_client.get_state_for_room( - origin, room_id, p, + + with logcontext.nested_logging_context(p): + state, got_auth_chain = ( + yield self.federation_client.get_state_for_room( + origin, room_id, p, + ) ) - ) - auth_chains.update(got_auth_chain) - state_group = {(x.type, x.state_key): x.event_id for x in state} - state_groups.append(state_group) + auth_chains.update(got_auth_chain) + state_group = { + (x.type, x.state_key): x.event_id for x in state + } + state_groups.append(state_group) # Resolve any conflicting state def fetch(ev_ids): @@ -483,20 +487,21 @@ class FederationHandler(BaseHandler): "[%s %s] Handling received prev_event %s", room_id, event_id, ev.event_id, ) - try: - yield self.on_receive_pdu( - origin, - ev, - sent_to_us_directly=False, - ) - except FederationError as e: - if e.code == 403: - logger.warn( - "[%s %s] Received prev_event %s failed history check.", - room_id, event_id, ev.event_id, + with logcontext.nested_logging_context(ev.event_id): + try: + yield self.on_receive_pdu( + origin, + ev, + sent_to_us_directly=False, ) - else: - raise + except FederationError as e: + if e.code == 403: + logger.warn( + "[%s %s] Received prev_event %s failed history check.", + room_id, event_id, ev.event_id, + ) + else: + raise @defer.inlineCallbacks def _process_received_pdu(self, origin, event, state, auth_chain): @@ -1135,7 +1140,8 @@ class FederationHandler(BaseHandler): try: logger.info("Processing queued PDU %s which was received " "while we were joining %s", p.event_id, p.room_id) - yield self.on_receive_pdu(origin, p, sent_to_us_directly=True) + with logcontext.nested_logging_context(p.event_id): + yield self.on_receive_pdu(origin, p, sent_to_us_directly=True) except Exception as e: logger.warn( "Error handling queued PDU %s from %s: %s", @@ -1581,15 +1587,22 @@ class FederationHandler(BaseHandler): Notifies about the events where appropriate. """ - contexts = yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - logcontext.run_in_background( - self._prep_event, + + @defer.inlineCallbacks + def prep(ev_info): + event = ev_info["event"] + with logcontext.nested_logging_context(suffix=event.event_id): + res = yield self._prep_event( origin, - ev_info["event"], + event, state=ev_info.get("state"), auth_events=ev_info.get("auth_events"), ) + defer.returnValue(res) + + contexts = yield logcontext.make_deferred_yieldable(defer.gatherResults( + [ + logcontext.run_in_background(prep, ev_info) for ev_info in event_infos ], consumeErrors=True, )) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a0c2d37610..89224b26cc 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -200,7 +200,7 @@ class LoggingContext(object): sentinel = Sentinel() - def __init__(self, name=None, parent_context=None): + def __init__(self, name=None, parent_context=None, request=None): self.previous_context = LoggingContext.current_context() self.name = name @@ -218,6 +218,13 @@ class LoggingContext(object): self.parent_context = parent_context + if self.parent_context is not None: + self.parent_context.copy_to(self) + + if request is not None: + # the request param overrides the request from the parent context + self.request = request + def __str__(self): return "%s@%x" % (self.name, id(self)) @@ -256,9 +263,6 @@ class LoggingContext(object): ) self.alive = True - if self.parent_context is not None: - self.parent_context.copy_to(self) - return self def __exit__(self, type, value, traceback): @@ -439,6 +443,35 @@ class PreserveLoggingContext(object): ) +def nested_logging_context(suffix, parent_context=None): + """Creates a new logging context as a child of another. + + The nested logging context will have a 'request' made up of the parent context's + request, plus the given suffix. + + CPU/db usage stats will be added to the parent context's on exit. + + Normal usage looks like: + + with nested_logging_context(suffix): + # ... do stuff + + Args: + suffix (str): suffix to add to the parent context's 'request'. + parent_context (LoggingContext|None): parent context. Will use the current context + if None. + + Returns: + LoggingContext: new logging context. + """ + if parent_context is None: + parent_context = LoggingContext.current_context() + return LoggingContext( + parent_context=parent_context, + request=parent_context.request + "-" + suffix, + ) + + def preserve_fn(f): """Function decorator which wraps the function with run_in_background""" def g(*args, **kwargs): diff --git a/tests/test_federation.py b/tests/test_federation.py index 2540604fcc..ff55c7a627 100644 --- a/tests/test_federation.py +++ b/tests/test_federation.py @@ -6,6 +6,7 @@ from twisted.internet.defer import maybeDeferred, succeed from synapse.events import FrozenEvent from synapse.types import Requester, UserID from synapse.util import Clock +from synapse.util.logcontext import LoggingContext from tests import unittest from tests.server import ThreadedMemoryReactorClock, setup_test_homeserver @@ -117,9 +118,10 @@ class MessageAcceptTests(unittest.TestCase): } ) - d = self.handler.on_receive_pdu( - "test.serv", lying_event, sent_to_us_directly=True - ) + with LoggingContext(request="lying_event"): + d = self.handler.on_receive_pdu( + "test.serv", lying_event, sent_to_us_directly=True + ) # Step the reactor, so the database fetches come back self.reactor.advance(1) @@ -209,11 +211,12 @@ class MessageAcceptTests(unittest.TestCase): } ) - d = self.handler.on_receive_pdu( - "test.serv", good_event, sent_to_us_directly=True - ) - self.reactor.advance(1) - self.assertEqual(self.successResultOf(d), None) + with LoggingContext(request="good_event"): + d = self.handler.on_receive_pdu( + "test.serv", good_event, sent_to_us_directly=True + ) + self.reactor.advance(1) + self.assertEqual(self.successResultOf(d), None) bad_event = FrozenEvent( { @@ -230,10 +233,11 @@ class MessageAcceptTests(unittest.TestCase): } ) - d = self.handler.on_receive_pdu( - "test.serv", bad_event, sent_to_us_directly=True - ) - self.reactor.advance(1) + with LoggingContext(request="bad_event"): + d = self.handler.on_receive_pdu( + "test.serv", bad_event, sent_to_us_directly=True + ) + self.reactor.advance(1) extrem = maybeDeferred( self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index 4633db77b3..8adaee3c8d 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -159,6 +159,11 @@ class LoggingContextTestCase(unittest.TestCase): self.assertEqual(r, "bum") self._check_test_key("one") + def test_nested_logging_context(self): + with LoggingContext(request="foo"): + nested_context = logcontext.nested_logging_context(suffix="bar") + self.assertEqual(nested_context.request, "foo-bar") + # a function which returns a deferred which has been "called", but # which had a function which returned another incomplete deferred on -- cgit 1.5.1 From fc0f13dd036cec4e41f5969d021d9dd10d6e5016 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Oct 2018 20:37:16 +0100 Subject: Fix incorrect truncation in get_missing_events It's quite important that get_missing_events returns the *latest* events in the room; however we were pulling event ids out of the database until we got *at least* 10, and then taking the *earliest* of the results. We also shouldn't really be relying on depth, and should be checking the room_id. --- changelog.d/4045.bugfix | 1 + synapse/federation/federation_server.py | 8 +++---- synapse/federation/transport/server.py | 2 -- synapse/handlers/federation.py | 12 +++++------ synapse/storage/event_federation.py | 38 ++++++++++++++------------------- 5 files changed, 26 insertions(+), 35 deletions(-) create mode 100644 changelog.d/4045.bugfix (limited to 'synapse/federation/federation_server.py') diff --git a/changelog.d/4045.bugfix b/changelog.d/4045.bugfix new file mode 100644 index 0000000000..fa50eb5aff --- /dev/null +++ b/changelog.d/4045.bugfix @@ -0,0 +1 @@ +Fix bug which made get_missing_events return too few events \ No newline at end of file diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 819e8f7331..4efe95faa4 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -507,19 +507,19 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function def on_get_missing_events(self, origin, room_id, earliest_events, - latest_events, limit, min_depth): + latest_events, limit): with (yield self._server_linearizer.queue((origin, room_id))): origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, room_id) logger.info( "on_get_missing_events: earliest_events: %r, latest_events: %r," - " limit: %d, min_depth: %d", - earliest_events, latest_events, limit, min_depth + " limit: %d", + earliest_events, latest_events, limit, ) missing_events = yield self.handler.on_get_missing_events( - origin, room_id, earliest_events, latest_events, limit, min_depth + origin, room_id, earliest_events, latest_events, limit, ) if len(missing_events) < 5: diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2f874b4838..7288d49074 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -560,7 +560,6 @@ class FederationGetMissingEventsServlet(BaseFederationServlet): @defer.inlineCallbacks def on_POST(self, origin, content, query, room_id): limit = int(content.get("limit", 10)) - min_depth = int(content.get("min_depth", 0)) earliest_events = content.get("earliest_events", []) latest_events = content.get("latest_events", []) @@ -569,7 +568,6 @@ class FederationGetMissingEventsServlet(BaseFederationServlet): room_id=room_id, earliest_events=earliest_events, latest_events=latest_events, - min_depth=min_depth, limit=limit, ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 45d955e6f5..cab57a8849 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -309,8 +309,8 @@ class FederationHandler(BaseHandler): if sent_to_us_directly: logger.warn( - "[%s %s] Failed to fetch %d prev events: rejecting", - room_id, event_id, len(prevs - seen), + "[%s %s] Rejecting: failed to fetch %d prev events: %s", + room_id, event_id, len(prevs - seen), shortstr(prevs - seen) ) raise FederationError( "ERROR", @@ -452,8 +452,8 @@ class FederationHandler(BaseHandler): latest |= seen logger.info( - "[%s %s]: Requesting %d prev_events: %s", - room_id, event_id, len(prevs - seen), shortstr(prevs - seen) + "[%s %s]: Requesting missing events between %s and %s", + room_id, event_id, shortstr(latest), event_id, ) # XXX: we set timeout to 10s to help workaround @@ -1852,7 +1852,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def on_get_missing_events(self, origin, room_id, earliest_events, - latest_events, limit, min_depth): + latest_events, limit): in_room = yield self.auth.check_host_in_room( room_id, origin @@ -1861,14 +1861,12 @@ class FederationHandler(BaseHandler): raise AuthError(403, "Host not in room.") limit = min(limit, 20) - min_depth = max(min_depth, 0) missing_events = yield self.store.get_missing_events( room_id=room_id, earliest_events=earliest_events, latest_events=latest_events, limit=limit, - min_depth=min_depth, ) missing_events = yield filter_events_for_server( diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 24345b20a6..3faca2a042 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -376,33 +376,25 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, @defer.inlineCallbacks def get_missing_events(self, room_id, earliest_events, latest_events, - limit, min_depth): + limit): ids = yield self.runInteraction( "get_missing_events", self._get_missing_events, - room_id, earliest_events, latest_events, limit, min_depth + room_id, earliest_events, latest_events, limit, ) - events = yield self._get_events(ids) - - events = sorted( - [ev for ev in events if ev.depth >= min_depth], - key=lambda e: e.depth, - ) - - defer.returnValue(events[:limit]) + defer.returnValue(events) def _get_missing_events(self, txn, room_id, earliest_events, latest_events, - limit, min_depth): - - earliest_events = set(earliest_events) - front = set(latest_events) - earliest_events + limit): - event_results = set() + seen_events = set(earliest_events) + front = set(latest_events) - seen_events + event_results = [] query = ( "SELECT prev_event_id FROM event_edges " - "WHERE event_id = ? AND is_state = ? " + "WHERE room_id = ? AND event_id = ? AND is_state = ? " "LIMIT ?" ) @@ -411,18 +403,20 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, for event_id in front: txn.execute( query, - (event_id, False, limit - len(event_results)) + (room_id, event_id, False, limit - len(event_results)) ) - for e_id, in txn: - new_front.add(e_id) + new_results = set(t[0] for t in txn) - seen_events - new_front -= earliest_events - new_front -= event_results + new_front |= new_results + seen_events |= new_results + event_results.extend(new_results) front = new_front - event_results |= new_front + # we built the list working backwards from latest_events; we now need to + # reverse it so that the events are approximately chronological. + event_results.reverse() return event_results -- cgit 1.5.1 From 084046456ec88588779a62f9378c1a8e911bfc7c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Oct 2018 16:14:04 +0100 Subject: Add config option to control alias creation --- synapse/config/homeserver.py | 3 +- synapse/config/room_directory.py | 101 ++++++++++++++++++++++++++++++++ synapse/federation/federation_server.py | 16 +---- synapse/handlers/directory.py | 9 +++ synapse/util/__init__.py | 21 +++++++ 5 files changed, 135 insertions(+), 15 deletions(-) create mode 100644 synapse/config/room_directory.py (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index b8d5690f2b..10dd40159f 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -31,6 +31,7 @@ from .push import PushConfig from .ratelimiting import RatelimitConfig from .registration import RegistrationConfig from .repository import ContentRepositoryConfig +from .room_directory import RoomDirectoryConfig from .saml2 import SAML2Config from .server import ServerConfig from .server_notices_config import ServerNoticesConfig @@ -49,7 +50,7 @@ class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, WorkerConfig, PasswordAuthProviderConfig, PushConfig, SpamCheckerConfig, GroupsConfig, UserDirectoryConfig, ConsentConfig, - ServerNoticesConfig, + ServerNoticesConfig, RoomDirectoryConfig, ): pass diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py new file mode 100644 index 0000000000..41ef3217e8 --- /dev/null +++ b/synapse/config/room_directory.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util import glob_to_regex + +from ._base import Config, ConfigError + + +class RoomDirectoryConfig(Config): + def read_config(self, config): + alias_creation_rules = config["alias_creation_rules"] + + self._alias_creation_rules = [ + _AliasRule(rule) + for rule in alias_creation_rules + ] + + def default_config(self, config_dir_path, server_name, **kwargs): + return """ + # The `alias_creation` option controls who's allowed to create aliases + # on this server. + # + # The format of this option is a list of rules that contain globs that + # match against user_id and the new alias (fully qualified with server + # name). The action in the first rule that matches is taken, which can + # currently either be "allowed" or "denied". + # + # If no rules match the request is denied. + alias_creation_rules: + - user_id: "*" + alias: "*" + action: allowed + """ + + def is_alias_creation_allowed(self, user_id, alias): + """Checks if the given user is allowed to create the given alias + + Args: + user_id (str) + alias (str) + + Returns: + boolean: True if user is allowed to crate the alias + """ + for rule in self._alias_creation_rules: + if rule.matches(user_id, alias): + return rule.action == "allowed" + + return False + + +class _AliasRule(object): + def __init__(self, rule): + action = rule["action"] + user_id = rule["user_id"] + alias = rule["alias"] + + if action in ("allowed", "denied"): + self.action = action + else: + raise ConfigError( + "alias_creation_rules rules can only have action of 'allowed'" + " or 'denied'" + ) + + try: + self._user_id_regex = glob_to_regex(user_id) + self._alias_regex = glob_to_regex(alias) + except Exception as e: + raise ConfigError("Failed to parse glob into regex: %s", e) + + def matches(self, user_id, alias): + """Tests if this rule matches the given user_id and alias. + + Args: + user_id (str) + alias (str) + + Returns: + boolean + """ + + if not self._user_id_regex.search(user_id): + return False + + if not self._alias_regex.search(alias): + return False + + return True diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 4efe95faa4..d041c26824 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import re import six from six import iteritems @@ -44,6 +43,7 @@ from synapse.replication.http.federation import ( ReplicationGetQueryRestServlet, ) from synapse.types import get_domain_from_id +from synapse.util import glob_to_regex from synapse.util.async_helpers import Linearizer, concurrently_execute from synapse.util.caches.response_cache import ResponseCache from synapse.util.logcontext import nested_logging_context @@ -729,22 +729,10 @@ def _acl_entry_matches(server_name, acl_entry): if not isinstance(acl_entry, six.string_types): logger.warn("Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry)) return False - regex = _glob_to_regex(acl_entry) + regex = glob_to_regex(acl_entry) return regex.match(server_name) -def _glob_to_regex(glob): - res = '' - for c in glob: - if c == '*': - res = res + '.*' - elif c == '?': - res = res + '.' - else: - res = res + re.escape(c) - return re.compile(res + "\\Z", re.IGNORECASE) - - class FederationHandlerRegistry(object): """Allows classes to register themselves as handlers for a given EDU or query type for incoming federation traffic. diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 02f12f6645..7d67bf803a 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -43,6 +43,7 @@ class DirectoryHandler(BaseHandler): self.state = hs.get_state_handler() self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() + self.config = hs.config self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( @@ -111,6 +112,14 @@ class DirectoryHandler(BaseHandler): 403, "This user is not permitted to create this alias", ) + if not self.config.is_alias_creation_allowed(user_id, room_alias.to_string()): + # Lets just return a generic message, as there may be all sorts of + # reasons why we said no. TODO: Allow configurable error messages + # per alias creation rule? + raise SynapseError( + 403, "Not allowed to create alias", + ) + can_create = yield self.can_modify_alias( room_alias, user_id=user_id diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index 9a8fae0497..163e4b35ff 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. import logging +import re from itertools import islice import attr @@ -138,3 +139,23 @@ def log_failure(failure, msg, consumeErrors=True): if not consumeErrors: return failure + + +def glob_to_regex(glob): + """Converts a glob to a compiled regex object + + Args: + glob (str) + + Returns: + re.RegexObject + """ + res = '' + for c in glob: + if c == '*': + res = res + '.*' + elif c == '?': + res = res + '.' + else: + res = res + re.escape(c) + return re.compile(res + "\\Z", re.IGNORECASE) -- cgit 1.5.1 From ef771cc4c2d988e5188ba7e75df9adebb0ebafe1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Oct 2018 10:35:01 +0100 Subject: Fix a number of flake8 errors Broadly three things here: * disable W504 which seems a bit whacko * remove a bunch of `as e` expressions from exception handlers that don't use them * use `r""` for strings which include backslashes Also, we don't use pep8 any more, so we can get rid of the duplicate config there. --- changelog.d/4082.misc | 1 + scripts-dev/tail-synapse.py | 2 +- scripts/synapse_port_db | 3 ++- setup.cfg | 17 ++++++++--------- synapse/config/repository.py | 2 +- synapse/crypto/keyclient.py | 2 +- synapse/federation/federation_server.py | 2 +- synapse/rest/client/v2_alpha/auth.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 2 +- synapse/storage/registration.py | 2 +- tests/config/test_generate.py | 2 +- tests/events/test_utils.py | 4 ++-- 12 files changed, 21 insertions(+), 20 deletions(-) create mode 100644 changelog.d/4082.misc (limited to 'synapse/federation/federation_server.py') diff --git a/changelog.d/4082.misc b/changelog.d/4082.misc new file mode 100644 index 0000000000..a81faf5e9b --- /dev/null +++ b/changelog.d/4082.misc @@ -0,0 +1 @@ +Clean up some bits of code which were flagged by the linter diff --git a/scripts-dev/tail-synapse.py b/scripts-dev/tail-synapse.py index 1a36b94038..7c9985d9f0 100644 --- a/scripts-dev/tail-synapse.py +++ b/scripts-dev/tail-synapse.py @@ -48,7 +48,7 @@ def main(): row.name: row.position for row in replicate(server, {"streams": "-1"})["streams"].rows } - except requests.exceptions.ConnectionError as e: + except requests.exceptions.ConnectionError: time.sleep(0.1) while True: diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 2f6e69e552..3c7b606323 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -501,7 +501,8 @@ class Porter(object): try: yield self.postgres_store.runInteraction("alter_table", alter_table) - except Exception as e: + except Exception: + # On Error Resume Next pass yield self.postgres_store.runInteraction( diff --git a/setup.cfg b/setup.cfg index 52feaa9cc7..b6b4aa740d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,17 +14,16 @@ ignore = pylint.cfg tox.ini -[pep8] -max-line-length = 90 -# W503 requires that binary operators be at the end, not start, of lines. Erik -# doesn't like it. E203 is contrary to PEP8. E731 is silly. -ignore = W503,E203,E731 - [flake8] -# note that flake8 inherits the "ignore" settings from "pep8" (because it uses -# pep8 to do those checks), but not the "max-line-length" setting max-line-length = 90 -ignore=W503,E203,E731 + +# see https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes +# for error codes. The ones we ignore are: +# W503: line break before binary operator +# W504: line break after binary operator +# E203: whitespace before ':' (which is contrary to pep8?) +# E731: do not assign a lambda expression, use a def +ignore=W503,W504,E203,E731 [isort] line_length = 89 diff --git a/synapse/config/repository.py b/synapse/config/repository.py index fc909c1fac..06c62ab62c 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -178,7 +178,7 @@ class ContentRepositoryConfig(Config): def default_config(self, **kwargs): media_store = self.default_path("media_store") uploads_path = self.default_path("uploads") - return """ + return r""" # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py index 57d4665e84..080c81f14b 100644 --- a/synapse/crypto/keyclient.py +++ b/synapse/crypto/keyclient.py @@ -55,7 +55,7 @@ def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1): raise IOError("Cannot get key for %r" % server_name) except (ConnectError, DomainError) as e: logger.warn("Error getting key for %r: %s", server_name, e) - except Exception as e: + except Exception: logger.exception("Error getting key for %r", server_name) raise IOError("Cannot get key for %r" % server_name) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 4efe95faa4..af0107a46e 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -800,7 +800,7 @@ class FederationHandlerRegistry(object): yield handler(origin, content) except SynapseError as e: logger.info("Failed to handle edu %r: %r", edu_type, e) - except Exception as e: + except Exception: logger.exception("Failed to handle edu %r", edu_type) def on_query(self, query_type, args): diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/v2_alpha/auth.py index bd8b5f4afa..693b303881 100644 --- a/synapse/rest/client/v2_alpha/auth.py +++ b/synapse/rest/client/v2_alpha/auth.py @@ -99,7 +99,7 @@ class AuthRestServlet(RestServlet): cannot be handled in the normal flow (with requests to the same endpoint). Current use is for web fallback auth. """ - PATTERNS = client_v2_patterns("/auth/(?P[\w\.]*)/fallback/web") + PATTERNS = client_v2_patterns(r"/auth/(?P[\w\.]*)/fallback/web") def __init__(self, hs): super(AuthRestServlet, self).__init__() diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 8c892ff187..1a7bfd6b56 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -674,7 +674,7 @@ def summarize_paragraphs(text_nodes, min_size=200, max_size=500): # This splits the paragraph into words, but keeping the # (preceeding) whitespace intact so we can easily concat # words back together. - for match in re.finditer("\s*\S+", description): + for match in re.finditer(r"\s*\S+", description): word = match.group() # Keep adding words while the total length is less than diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 26b429e307..2dd14aba1c 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -567,7 +567,7 @@ class RegistrationStore(RegistrationWorkerStore, def _find_next_generated_user_id(txn): txn.execute("SELECT name FROM users") - regex = re.compile("^@(\d+):") + regex = re.compile(r"^@(\d+):") found = set() diff --git a/tests/config/test_generate.py b/tests/config/test_generate.py index f88d28a19d..0c23068bcf 100644 --- a/tests/config/test_generate.py +++ b/tests/config/test_generate.py @@ -67,6 +67,6 @@ class ConfigGenerationTestCase(unittest.TestCase): with open(log_config_file) as f: config = f.read() # find the 'filename' line - matches = re.findall("^\s*filename:\s*(.*)$", config, re.M) + matches = re.findall(r"^\s*filename:\s*(.*)$", config, re.M) self.assertEqual(1, len(matches)) self.assertEqual(matches[0], expected) diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index ff217ca8b9..d0cc492deb 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -156,7 +156,7 @@ class SerializeEventTestCase(unittest.TestCase): room_id="!foo:bar", content={"key.with.dots": {}}, ), - ["content.key\.with\.dots"], + [r"content.key\.with\.dots"], ), {"content": {"key.with.dots": {}}}, ) @@ -172,7 +172,7 @@ class SerializeEventTestCase(unittest.TestCase): "nested.dot.key": {"leaf.key": 42, "not_me_either": 1}, }, ), - ["content.nested\.dot\.key.leaf\.key"], + [r"content.nested\.dot\.key.leaf\.key"], ), {"content": {"nested.dot.key": {"leaf.key": 42}}}, ) -- cgit 1.5.1